src-highlite.git - src-highlite

index : src-highlite.git
src-highlite
summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/.cvsignore 1
-rw-r--r--src/Makefile.am 41
-rw-r--r--src/asserttest.h 5
-rw-r--r--src/changelog.lang 12
-rw-r--r--src/check-regexp.cpp 120
-rw-r--r--src/check-regexp_cmd.c 293
-rw-r--r--src/check-regexp_cmd.ggo 12
-rw-r--r--src/check-regexp_cmd.h 62
-rw-r--r--src/cmdline.c (renamed from src/lib/cmdline.c)2
-rw-r--r--src/cmdline.ggo (renamed from src/lib/cmdline.ggo)30
-rw-r--r--src/cmdline.h (renamed from src/lib/cmdline.h)2
-rw-r--r--src/cmdlineargs.h (renamed from src/lib/cmdlineargs.h)0
-rw-r--r--src/cpp.lang 3
-rw-r--r--src/default.css 1
-rw-r--r--src/default.style 1
-rw-r--r--src/docbook.outlang 5
-rw-r--r--src/docbookdoc.outlang 14
-rw-r--r--src/esc.style 1
-rw-r--r--src/html.lang 4
-rw-r--r--src/html_notfixed.outlang 6
-rw-r--r--src/java.lang 2
-rw-r--r--src/lang.map 4
-rw-r--r--src/langdef.lang 5
-rw-r--r--src/lib/.cvsignore 3
-rw-r--r--src/lib/Makefile.am 59
-rw-r--r--src/lib/delimitedlangelem.cpp 7
-rw-r--r--src/lib/delimitedlangelem.h 2
-rw-r--r--src/lib/fileutil.cc 5
-rw-r--r--src/lib/generatorfactory.cc 180
-rw-r--r--src/lib/generatorfactory.h 9
-rw-r--r--src/lib/generatormap.cpp 154
-rw-r--r--src/lib/generatormap.h 86
-rw-r--r--src/lib/keys.h 11
-rw-r--r--src/lib/langdefparser.yy 72
-rw-r--r--src/lib/langdefscanner.ll 36
-rw-r--r--src/lib/langelem.cpp 30
-rw-r--r--src/lib/langelem.h 13
-rw-r--r--src/lib/langelems.cpp 6
-rw-r--r--src/lib/langelems.h 1
-rw-r--r--src/lib/languageinfer.cpp 16
-rw-r--r--src/lib/linebuffer.cpp 26
-rw-r--r--src/lib/linebuffer.h 6
-rw-r--r--src/lib/lineoutputgenerator.cpp 74
-rw-r--r--src/lib/lineoutputgenerator.h 48
-rw-r--r--src/lib/maingeneratormap.cpp 14
-rw-r--r--src/lib/maingeneratormap.h 19
-rw-r--r--src/lib/messages.cc 33
-rw-r--r--src/lib/messages.h 7
-rw-r--r--src/lib/my_set.h 16
-rw-r--r--src/lib/my_string.h 11
-rw-r--r--src/lib/namedsubexpslangelem.cpp 47
-rw-r--r--src/lib/namedsubexpslangelem.h 50
-rw-r--r--src/lib/outlangdefparser.yy 31
-rw-r--r--src/lib/outputgenerator.cpp 94
-rw-r--r--src/lib/outputgenerator.h 32
-rw-r--r--src/lib/parserinfo.h 2
-rw-r--r--src/lib/parsestyles.h 2
-rw-r--r--src/lib/readtags.c 2
-rw-r--r--src/lib/readtags.h 2
-rw-r--r--src/lib/refgeneratormap.cpp 214
-rw-r--r--src/lib/refgeneratormap.h 49
-rw-r--r--src/lib/refposition.h 8
-rw-r--r--src/lib/regexpengine.cpp 265
-rw-r--r--src/lib/regexpengine.h (renamed from src/regexpengine.h)29
-rw-r--r--src/lib/regexpenginedebug.cpp (renamed from src/regexpenginedebug.cpp)34
-rw-r--r--src/lib/regexpenginedebug.h (renamed from src/regexpenginedebug.h)4
-rw-r--r--src/lib/regexpreprocessor.cpp 214
-rw-r--r--src/lib/regexpreprocessor.h 86
-rw-r--r--src/lib/regexpstate.cpp 207
-rw-r--r--src/lib/regexpstate.h 191
-rw-r--r--src/lib/regexpstatebuilder.H 20
-rw-r--r--src/lib/regexpstatebuilder.cpp 142
-rw-r--r--src/lib/regexpstatebuilder.h 9
-rw-r--r--src/lib/regexpstatebuilder_dbtab.cc 8
-rw-r--r--src/lib/regexpstateprinter.cpp 52
-rw-r--r--src/lib/statelangelem.cpp 8
-rw-r--r--src/lib/statelangelem.h 2
-rw-r--r--src/lib/stringdef.cpp 28
-rw-r--r--src/lib/stringdef.h 69
-rw-r--r--src/lib/stringlistlangelem.cpp 8
-rw-r--r--src/lib/stringlistlangelem.h 2
-rw-r--r--src/lib/stylecssparser.yy 23
-rw-r--r--src/lib/stylecssscanner.ll 11
-rw-r--r--src/lib/styleparser.yy 35
-rw-r--r--src/lib/stylescanner.ll 14
-rw-r--r--src/lib/test_langinfer.cpp 4
-rw-r--r--src/lib/test_regexpreprocessor_main.cpp 167
-rw-r--r--src/lib/textformatter.cpp 332
-rw-r--r--src/lib/textformatter.h 120
-rw-r--r--src/lib/tostringcollection.h 60
-rw-r--r--src/log.lang 4
-rw-r--r--src/logtalk.lang 6
-rw-r--r--src/outlang.map 3
-rw-r--r--src/perl.lang 102
-rw-r--r--src/postscript.lang 4
-rw-r--r--src/regexpengine.cpp 177
-rw-r--r--src/regexpstate.cpp 83
-rw-r--r--src/regexpstate.h 78
-rw-r--r--src/ruby.lang 13
-rw-r--r--src/startapp.cc 1104
-rw-r--r--src/startapp.h 2
-rw-r--r--src/texinfo.style 1
-rw-r--r--src/xhtml_notfixed.outlang 16
-rw-r--r--src/xhtmltable.outlang 11
104 files changed, 3603 insertions, 2248 deletions
diff --git a/src/.cvsignore b/src/.cvsignore
index ab0c685..b5caf54 100644
--- a/src/.cvsignore
+++ b/src/.cvsignore
@@ -26,3 +26,4 @@ cpp2html.cc.html
cpp2html.h.html
src-hilite-lesspipe.sh
libsh.a
+check-regexp \ No newline at end of file
diff --git a/src/Makefile.am b/src/Makefile.am
index e6ca190..ffed6b7 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1,4 +1,4 @@
-# Copyright (C) 1999, 2000, Lorenzo Bettini <http://www.lorenzobettini.it>
+# Copyright (C) 1999-2007 Lorenzo Bettini <http://www.lorenzobettini.it>
#
# This file is free software; as a special exception the author gives
# unlimited permission to copy and/or distribute it, with or without
@@ -8,10 +8,24 @@
# WITHOUT ANY WARRANTY, to the extent permitted by law; without even the
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-bin_PROGRAMS = source-highlight
+SUFFIXES = .ggo
+
+if NO_GENGETOPT
+.ggo.c:
+ touch $@
+else
+
+check-regexp_cmd.c: $(srcdir)/check-regexp_cmd.ggo
+ $(GENGETOPT) --input $(srcdir)/check-regexp_cmd.ggo --unamed-opts -F check-regexp_cmd
+
+.ggo.c:
+ $(GENGETOPT) --input $< --unamed-opts --no-handle-help --no-handle-version
+endif
+
+
+bin_PROGRAMS = source-highlight check-regexp
bin_SCRIPTS = java2html cpp2html src-hilite-lesspipe.sh
-noinst_LTLIBRARIES = libsh.la
-libsh_la_LIBADD = $(BOOST_REGEX_LIB) $(LDADD) lib/libcommon.la
+
EXTRA_PROGRAMS = source-highlight-cgi
SRCHILITE = $(top_builddir)/src/source-highlight$(EXEEXT)
@@ -24,8 +38,10 @@ AM_CPPFLAGS = -I$(top_srcdir)/gl -I$(top_builddir)/gl
LDADD = @LEXLIB@ $(top_builddir)/gl/libgnu.la
-source_highlight_LDADD = libsh.la
-source_highlight_cgi_LDADD = libsh.la
+source_highlight_LDADD = $(BOOST_REGEX_LIB) $(LDADD) lib/libcommon.la
+source_highlight_cgi_LDADD = $(source_highlight_LDADD)
+
+check_regexp_LDADD = $(BOOST_REGEX_LIB) $(top_builddir)/gl/libgnu.la
SUBDIRS = \
lib \
@@ -35,12 +51,11 @@ DIST_SUBDIRS = \
lib \
includes
-libsh_la_SOURCES = source-highlight.cc regexpengine.cpp regexpstate.cpp regexpenginedebug.cpp
-
# for most rules, we use one file per line. `diffs' are more clear this way
-source_highlight_SOURCES = startapp.cc
+source_highlight_SOURCES = cmdline.c startapp.cc source-highlight.cc
+check_regexp_SOURCES = check-regexp_cmd.c check-regexp.cpp
-source_highlight_cgi_SOURCES = startapp-cgi.cc envmapper.c
+source_highlight_cgi_SOURCES = cmdline.c startapp-cgi.cc envmapper.c
# files that we don't want automake/autoconf to touch ever.
# just stick them in the distribution as-is
@@ -81,10 +96,12 @@ xhtml_common.outlang \
xhtmlcss.outlang \
xhtmldoc.outlang \
xhtml.outlang \
+xhtml_notfixed.outlang \
xhtmltable.outlang \
texinfo.outlang \
javadoc.outlang \
-docbook.outlang
+docbook.outlang \
+docbookdoc.outlang
STYLEFILES = texinfo.style esc.style
@@ -111,4 +128,4 @@ tags:
.PHONY: tags
-noinst_HEADERS = regexpengine.h regexpstate.h asserttest.h regexpenginedebug.h
+noinst_HEADERS = asserttest.h cmdline.h cmdlineargs.h cmdline.ggo check-regexp_cmd.h check-regexp_cmd.ggo
diff --git a/src/asserttest.h b/src/asserttest.h
index 7b54235..c3275a2 100644
--- a/src/asserttest.h
+++ b/src/asserttest.h
@@ -15,8 +15,9 @@
#include <iostream>
#include <stdlib.h>
+template <typename T>
int
-assertEquals(const std::string &expected, const std::string &actual)
+assertEquals(T expected, T actual)
{
if (expected != actual) {
std::cerr << "assertEquals failed" << std::endl;
@@ -30,7 +31,7 @@ assertEquals(const std::string &expected, const std::string &actual)
}
int
-assertEquals(bool expected, bool actual)
+assertEquals(const std::string &expected, const std::string &actual)
{
if (expected != actual) {
std::cerr << "assertEquals failed" << std::endl;
diff --git a/src/changelog.lang b/src/changelog.lang
index 828bf26..0a1fb37 100644
--- a/src/changelog.lang
+++ b/src/changelog.lang
@@ -3,14 +3,6 @@ state date start '[[:digit:]]{2,4}-?[[:digit:]]{2}-?[[:digit:]]{2}' begin
name = '([[:word:]]|[[:punct:]])+'
end
-state symbol start '^(?:[\t]+|[[:space:]]+)\*[[:space:]]+' begin
- state file start '[^:]+\:' begin
- normal start '.'
- end
-end
+(normal,symbol,normal,file)= `(^[[:blank:]]+)(\*)([[:blank:]]+)((?:[^:]+\:)?)`
+(normal,file)= `(^[[:blank:]]+)((?:[^:]+\:)?)`
-state normal start '^(?:[\t]+|[[:space:]]+)' begin
- state file start '[^:]+\:' begin
- normal start '.'
- end
-end
diff --git a/src/check-regexp.cpp b/src/check-regexp.cpp
new file mode 100644
index 0000000..1808381
--- /dev/null
+++ b/src/check-regexp.cpp
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2007 Lorenzo Bettini <http://www.lorenzobettini.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+/*
+ * This program is part of GNU source-highlight simply to
+ * check a regular expression against a given an expression
+ */
+
+/*
+ * some examples:
+ *
+
+ ./check-regexp "([^[:alnum:]]+)[^[:blank:]]*(1円)[^[:blank:]]*(1円)" "|w|\$|e|d| ^w^w^ ?a?b?"
+ trying to match: |w|$|e|d| ^w^w^ ?a?b?
+ against : ([^[:alnum:]]+)[^[:blank:]]*(1円)[^[:blank:]]*(1円)
+ what[0]: |w|$|e|d|
+ what[1]: |
+ what[2]: |
+ what[3]: |
+ prefix:
+ what[0]: ^w^w^
+ what[1]: ^
+ what[2]: ^
+ what[3]: ^
+ prefix:
+ what[0]: ?a?b?
+ what[1]: ?
+ what[2]: ?
+ what[3]: ?
+ total number of matches: 3
+
+ *
+ */
+
+#include <cstdlib>
+#include <boost/regex.hpp>
+#include <iostream>
+
+#include "check-regexp_cmd.h"
+
+using namespace std;
+
+int main(int argc, char * argv[]) {
+ gengetopt_args_info args_info; // command line structure
+
+ if (cmdline_parser(argc, argv, &args_info)!= 0)
+ // calls cmdline parser. The user gived bag args if it doesn't return -1
+ return EXIT_FAILURE;
+
+ if (args_info.inputs_num < 2) {
+ cerr << "Syntax: check-regexp <regular expression> <expressions...>" << endl;
+ exit(EXIT_FAILURE);
+ }
+
+ boost::regex regex(args_info.inputs[0]);
+
+ for (unsigned int i = 1; i < args_info.inputs_num; ++i) {
+ string tomatch = args_info.inputs[i];
+
+ std::string::const_iterator start, end;
+ boost::match_results<std::string::const_iterator> what;
+ boost::match_flag_type flags;
+
+ start = tomatch.begin();
+ end = tomatch.end();
+ flags = boost::match_default;
+
+ cout << "\nsearching : " << tomatch << endl;
+ cout << "for the regexp : " << regex << endl;
+
+ int num_of_matches = 0;
+
+ while (boost::regex_search(start, end, what, regex, flags)) {
+ string prefix = what.prefix();
+ if (prefix.size())
+ cout << "prefix: " << prefix << endl;
+
+ cout << "what[0]: " << what[0] << endl;
+
+ for (unsigned int i = 1; i < what.size(); ++i) {
+ if (what[i].matched) {
+ cout << " what[" << i << "]: "<< what[i] << endl;
+
+ cout << " length: " << what[i].length()<< endl;
+ }
+ }
+
+ string suffix = what.suffix();
+ if (suffix.size())
+ cout << "suffix: " << suffix << endl;
+
+ // update search position:
+ start = what[0].second;
+ // update flags:
+ flags |= boost::match_prev_avail;
+ flags |= boost::match_not_bob;
+
+ ++num_of_matches;
+ }
+
+ cout << "total number of matches: " << num_of_matches << endl;
+ }
+}
+
diff --git a/src/check-regexp_cmd.c b/src/check-regexp_cmd.c
new file mode 100644
index 0000000..f20ab59
--- /dev/null
+++ b/src/check-regexp_cmd.c
@@ -0,0 +1,293 @@
+/*
+ File autogenerated by gengetopt version 2.19.2
+ generated with the following command:
+ /home/bettini/usr/local/bin/gengetopt --input ./check-regexp_cmd.ggo --unamed-opts -F check-regexp_cmd
+
+ The developers of gengetopt consider the fixed text that goes in all
+ gengetopt output files to be in the public domain:
+ we make no copyright claims on it.
+*/
+
+/* If we use autoconf. */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "getopt.h"
+
+#include "check-regexp_cmd.h"
+
+const char *gengetopt_args_info_purpose = "Tries to match the regular expression passed as the first argument\nagainst the strings passed as remaining arguments";
+
+const char *gengetopt_args_info_usage = "Usage: check-regexp 'regular expression' 'string1' 'string2' ...";
+
+const char *gengetopt_args_info_description = "";
+
+const char *gengetopt_args_info_help[] = {
+ " -h, --help Print help and exit",
+ " -V, --version Print version and exit",
+ 0
+};
+
+static
+void clear_given (struct gengetopt_args_info *args_info);
+static
+void clear_args (struct gengetopt_args_info *args_info);
+
+static int
+cmdline_parser_internal (int argc, char * const *argv, struct gengetopt_args_info *args_info, int override, int initialize, int check_required, const char *additional_error);
+
+
+static char *
+gengetopt_strdup (const char *s);
+
+static
+void clear_given (struct gengetopt_args_info *args_info)
+{
+ args_info->help_given = 0 ;
+ args_info->version_given = 0 ;
+}
+
+static
+void clear_args (struct gengetopt_args_info *args_info)
+{
+
+}
+
+static
+void init_args_info(struct gengetopt_args_info *args_info)
+{
+ args_info->help_help = gengetopt_args_info_help[0] ;
+ args_info->version_help = gengetopt_args_info_help[1] ;
+
+}
+
+void
+cmdline_parser_print_version (void)
+{
+ printf ("%s %s\n", CMDLINE_PARSER_PACKAGE, CMDLINE_PARSER_VERSION);
+}
+
+void
+cmdline_parser_print_help (void)
+{
+ int i = 0;
+ cmdline_parser_print_version ();
+
+ if (strlen(gengetopt_args_info_purpose) > 0)
+ printf("\n%s\n", gengetopt_args_info_purpose);
+
+ printf("\n%s\n\n", gengetopt_args_info_usage);
+
+ if (strlen(gengetopt_args_info_description) > 0)
+ printf("%s\n", gengetopt_args_info_description);
+
+ while (gengetopt_args_info_help[i])
+ printf("%s\n", gengetopt_args_info_help[i++]);
+}
+
+void
+cmdline_parser_init (struct gengetopt_args_info *args_info)
+{
+ clear_given (args_info);
+ clear_args (args_info);
+ init_args_info (args_info);
+
+ args_info->inputs = NULL;
+ args_info->inputs_num = 0;
+}
+
+static void
+cmdline_parser_release (struct gengetopt_args_info *args_info)
+{
+
+ unsigned int i;
+
+ for (i = 0; i < args_info->inputs_num; ++i)
+ free (args_info->inputs [i]);
+
+ if (args_info->inputs_num)
+ free (args_info->inputs);
+
+ clear_given (args_info);
+}
+
+int
+cmdline_parser_file_save(const char *filename, struct gengetopt_args_info *args_info)
+{
+ FILE *outfile;
+ int i = 0;
+
+ outfile = fopen(filename, "w");
+
+ if (!outfile)
+ {
+ fprintf (stderr, "%s: cannot open file for writing: %s\n", CMDLINE_PARSER_PACKAGE, filename);
+ return EXIT_FAILURE;
+ }
+
+ if (args_info->help_given) {
+ fprintf(outfile, "%s\n", "help");
+ }
+ if (args_info->version_given) {
+ fprintf(outfile, "%s\n", "version");
+ }
+
+ fclose (outfile);
+
+ i = EXIT_SUCCESS;
+ return i;
+}
+
+void
+cmdline_parser_free (struct gengetopt_args_info *args_info)
+{
+ cmdline_parser_release (args_info);
+}
+
+
+/* gengetopt_strdup() */
+/* strdup.c replacement of strdup, which is not standard */
+char *
+gengetopt_strdup (const char *s)
+{
+ char *result = NULL;
+ if (!s)
+ return result;
+
+ result = (char*)malloc(strlen(s) + 1);
+ if (result == (char*)0)
+ return (char*)0;
+ strcpy(result, s);
+ return result;
+}
+
+int
+cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info)
+{
+ return cmdline_parser2 (argc, argv, args_info, 0, 1, 1);
+}
+
+int
+cmdline_parser2 (int argc, char * const *argv, struct gengetopt_args_info *args_info, int override, int initialize, int check_required)
+{
+ int result;
+
+ result = cmdline_parser_internal (argc, argv, args_info, override, initialize, check_required, NULL);
+
+ if (result == EXIT_FAILURE)
+ {
+ cmdline_parser_free (args_info);
+ exit (EXIT_FAILURE);
+ }
+
+ return result;
+}
+
+int
+cmdline_parser_required (struct gengetopt_args_info *args_info, const char *prog_name)
+{
+ return EXIT_SUCCESS;
+}
+
+int
+cmdline_parser_internal (int argc, char * const *argv, struct gengetopt_args_info *args_info, int override, int initialize, int check_required, const char *additional_error)
+{
+ int c; /* Character of the parsed option. */
+
+ int error = 0;
+ struct gengetopt_args_info local_args_info;
+
+ if (initialize)
+ cmdline_parser_init (args_info);
+
+ cmdline_parser_init (&local_args_info);
+
+ optarg = 0;
+ optind = 0;
+ opterr = 1;
+ optopt = '?';
+
+ while (1)
+ {
+ int option_index = 0;
+ char *stop_char;
+
+ static struct option long_options[] = {
+ { "help", 0, NULL, 'h' },
+ { "version", 0, NULL, 'V' },
+ { NULL, 0, NULL, 0 }
+ };
+
+ stop_char = 0;
+ c = getopt_long (argc, argv, "hV", long_options, &option_index);
+
+ if (c == -1) break; /* Exit from `while (1)' loop. */
+
+ switch (c)
+ {
+ case 'h': /* Print help and exit. */
+ cmdline_parser_print_help ();
+ cmdline_parser_free (&local_args_info);
+ exit (EXIT_SUCCESS);
+
+ case 'V': /* Print version and exit. */
+ cmdline_parser_print_version ();
+ cmdline_parser_free (&local_args_info);
+ exit (EXIT_SUCCESS);
+
+
+ case 0: /* Long option with no short option */
+ case '?': /* Invalid option. */
+ /* `getopt_long' already printed an error message. */
+ goto failure;
+
+ default: /* bug: option not considered. */
+ fprintf (stderr, "%s: option unknown: %c%s\n", CMDLINE_PARSER_PACKAGE, c, (additional_error ? additional_error : ""));
+ abort ();
+ } /* switch */
+ } /* while */
+
+
+
+
+ cmdline_parser_release (&local_args_info);
+
+ if ( error )
+ return (EXIT_FAILURE);
+
+ if (optind < argc)
+ {
+ int i = 0 ;
+ int found_prog_name = 0;
+ /* whether program name, i.e., argv[0], is in the remaining args
+ (this may happen with some implementations of getopt,
+ but surely not with the one included by gengetopt) */
+
+ i = optind;
+ while (i < argc)
+ if (argv[i++] == argv[0]) {
+ found_prog_name = 1;
+ break;
+ }
+ i = 0;
+
+ args_info->inputs_num = argc - optind - found_prog_name;
+ args_info->inputs =
+ (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ;
+ while (optind < argc)
+ if (argv[optind++] != argv[0])
+ args_info->inputs[ i++ ] = gengetopt_strdup (argv[optind-1]) ;
+ }
+
+ return 0;
+
+failure:
+
+ cmdline_parser_release (&local_args_info);
+ return (EXIT_FAILURE);
+}
diff --git a/src/check-regexp_cmd.ggo b/src/check-regexp_cmd.ggo
new file mode 100644
index 0000000..6ddefb2
--- /dev/null
+++ b/src/check-regexp_cmd.ggo
@@ -0,0 +1,12 @@
+# Copyright (C) 1999-2007 Lorenzo Bettini, http://www.lorenzobettini.it
+
+# This file is used by gengetopt to generate a command line args parser
+# GNU gengetopt can be found at
+# http://www.gnu.org/software/gengetopt
+
+package "check-regexp (GNU Source-highlight)"
+
+purpose "Tries to match the regular expression passed as the first argument
+against the strings passed as remaining arguments"
+
+usage "check-regexp 'regular expression' 'string1' 'string2' ..."
diff --git a/src/check-regexp_cmd.h b/src/check-regexp_cmd.h
new file mode 100644
index 0000000..424ef12
--- /dev/null
+++ b/src/check-regexp_cmd.h
@@ -0,0 +1,62 @@
+/* check-regexp_cmd.h */
+
+/* File autogenerated by gengetopt version 2.19.2 */
+
+#ifndef CHECK_REGEXP_CMD_H
+#define CHECK_REGEXP_CMD_H
+
+/* If we use autoconf. */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef CMDLINE_PARSER_PACKAGE
+#define CMDLINE_PARSER_PACKAGE "check-regexp (GNU Source-highlight)"
+#endif
+
+#ifndef CMDLINE_PARSER_VERSION
+#define CMDLINE_PARSER_VERSION VERSION
+#endif
+
+struct gengetopt_args_info
+{
+ const char *help_help; /* Print help and exit help description. */
+ const char *version_help; /* Print version and exit help description. */
+
+ int help_given ; /* Whether help was given. */
+ int version_given ; /* Whether version was given. */
+
+ char **inputs ; /* unamed options */
+ unsigned inputs_num ; /* unamed options number */
+} ;
+
+extern const char *gengetopt_args_info_purpose;
+extern const char *gengetopt_args_info_usage;
+extern const char *gengetopt_args_info_help[];
+
+int cmdline_parser (int argc, char * const *argv,
+ struct gengetopt_args_info *args_info);
+int cmdline_parser2 (int argc, char * const *argv,
+ struct gengetopt_args_info *args_info,
+ int override, int initialize, int check_required);
+int cmdline_parser_file_save(const char *filename,
+ struct gengetopt_args_info *args_info);
+
+void cmdline_parser_print_help(void);
+void cmdline_parser_print_version(void);
+
+void cmdline_parser_init (struct gengetopt_args_info *args_info);
+void cmdline_parser_free (struct gengetopt_args_info *args_info);
+
+int cmdline_parser_required (struct gengetopt_args_info *args_info,
+ const char *prog_name);
+
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#endif /* CHECK_REGEXP_CMD_H */
diff --git a/src/lib/cmdline.c b/src/cmdline.c
index a471a23..fdc0f2c 100644
--- a/src/lib/cmdline.c
+++ b/src/cmdline.c
@@ -1,5 +1,5 @@
/*
- File autogenerated by gengetopt version 2.19.1
+ File autogenerated by gengetopt version 2.19.2
generated with the following command:
/home/bettini/usr/local/bin/gengetopt --input cmdline.ggo --unamed-opts --no-handle-help --no-handle-version
diff --git a/src/lib/cmdline.ggo b/src/cmdline.ggo
index e855554..25bac44 100644
--- a/src/lib/cmdline.ggo
+++ b/src/cmdline.ggo
@@ -1,37 +1,9 @@
-# Copyright (C) 1999, 2000 Lorenzo Bettini, http://www.lorenzobettini.it
+# Copyright (C) 1999-2007 Lorenzo Bettini, http://www.lorenzobettini.it
# This file is used by gengetopt to generate a command line args parser
# GNU gengetopt can be found at
# http://www.gnu.org/software/gengetopt
-# Specification file format:
-#
-# This file consist in lines of sentences with the following format:
-#
-# option <long> <short> <desc> <argtype> <required>
-# option <long> <short> <desc> flag <onoff>
-# option <long> <short> <desc> no
-# ... # ...
-#
-# Where:
-#
-# <packname> = Double quoted string with upper and lower case chars, digits,
-# '-' and '.'. No spaces allowed.
-# <version> = Double quoted string with upper and lower case chars, digits,
-# '-' and '.'. No spaces allowed.
-# <long> = Double quoted string with upper and lower case chars, digits,
-# '-' and '.'. No spaces allowed.
-# <short> = A single upper or lower case char, or a digit.
-# <desc> = String with upper and lower case chars, digits, '-', '.' and
-# spaces. First character must be no space.
-# <argtype> = string, int, short, long, float, double, longdouble or longlong.
-# <required> = yes or no.
-# <onoff> = on or off.
-# Comments begins with '#' in any place of the line and ends in the
-# end of line.
-# The third form of option is used if the option does not take an argument;
-# it must not be required.
-
purpose "Highlight the syntax of a source file (e.g. Java) into a specific format (e.g. HTML)"
# Options
diff --git a/src/lib/cmdline.h b/src/cmdline.h
index cc28f14..3a0b158 100644
--- a/src/lib/cmdline.h
+++ b/src/cmdline.h
@@ -1,6 +1,6 @@
/* cmdline.h */
-/* File autogenerated by gengetopt version 2.19.1 */
+/* File autogenerated by gengetopt version 2.19.2 */
#ifndef CMDLINE_H
#define CMDLINE_H
diff --git a/src/lib/cmdlineargs.h b/src/cmdlineargs.h
index 10ffe45..10ffe45 100644
--- a/src/lib/cmdlineargs.h
+++ b/src/cmdlineargs.h
diff --git a/src/cpp.lang b/src/cpp.lang
index 8a59c08..90a5e87 100644
--- a/src/cpp.lang
+++ b/src/cpp.lang
@@ -13,6 +13,9 @@ include "number.lang"
include "c_string.lang"
+(keyword,normal,type) =
+ `(\<(?:class|struct|typename))([[:blank:]]+)([[:alnum:]]+)`
+
keyword = "__asm|__cdecl|__declspec|__export|__far16",
"__fastcall|__fortran|__import",
"__pascal|__rtti|__stdcall|_asm|_cdecl",
diff --git a/src/default.css b/src/default.css
index e08e98b..52a320b 100644
--- a/src/default.css
+++ b/src/default.css
@@ -3,6 +3,7 @@ body { background-color: white; }
.keyword { color: blue; font-weight: bold; }
.type { color: darkgreen; }
.string { color: red; font-family: monospace; }
+.regexp { color: orange; }
.specialchar { color: pink; font-family: monospace; }
.comment { color: brown; font-style: italic; }
.number { color: purple; }
diff --git a/src/default.style b/src/default.style
index 5ade28d..14d5117 100644
--- a/src/default.style
+++ b/src/default.style
@@ -3,6 +3,7 @@ bgcolor "white"; // the background color for documents
keyword blue b ; // for language keywords
type darkgreen ; // for basic types
string red f ; // for strings and chars
+regexp orange f ; // for strings and chars
specialchar pink f ; // for special chars, e.g., \n, \t, \\
comment brown i, noref; // for comments
number purple ; // for literal numbers
diff --git a/src/docbook.outlang b/src/docbook.outlang
index 923eae0..0ed7a62 100644
--- a/src/docbook.outlang
+++ b/src/docbook.outlang
@@ -6,6 +6,11 @@ extension "xml"
bold "<emphasis role=\"strong\">$text</emphasis>"
italics "<emphasis>$text</emphasis>"
+anchor "<anchor id=\"line$linenum\"/>$text"
+postline_reference "<link linkend='line$linenum'>$text -> $linenum</link>"
+postdoc_reference "<link linkend='line$linenum'>$text -> $linenum</link>"
+reference "<link linkend='line$linenum'>$text</link>"
+
translations
"&" "&amp;"
"<" "&lt;"
diff --git a/src/docbookdoc.outlang b/src/docbookdoc.outlang
new file mode 100644
index 0000000..cb1f446
--- /dev/null
+++ b/src/docbookdoc.outlang
@@ -0,0 +1,14 @@
+include "docbook.outlang"
+
+doctemplate
+"<!DOCTYPE article PUBLIC \"-//OASIS//DTD DocBook//EN\">
+<article>
+<articleinfo>
+<title>prova</title>
+</articleinfo>
+<programlisting>"
+"</programlisting>
+</article>
+"
+end
+
diff --git a/src/esc.style b/src/esc.style
index 8c00bda..8a0aebd 100644
--- a/src/esc.style
+++ b/src/esc.style
@@ -1,6 +1,7 @@
keyword blue b ;
type darkgreen ;
string red ;
+regexp orange ;
specialchar pink ;
comment cyan i ;
number purple ;
diff --git a/src/html.lang b/src/html.lang
index af706ca..2893308 100644
--- a/src/html.lang
+++ b/src/html.lang
@@ -4,9 +4,9 @@ end
comment delim "<!--" "-->" multiline nested
-keyword = '<(/)?[[:alnum:]]+(/)?>'
+keyword = '<(/)?[[:alpha:]][[:alnum:]]*(/)?>'
-state keyword delim '<(/)?[[:alnum:]]+' '(/)?>' multiline begin
+state keyword delim '<(/)?[[:alpha:]][[:alnum:]]*' '(/)?>' multiline begin
type = '[^="[:blank:]>]+'
symbol = "="
string delim "\"" "\"" escape "\\" multiline
diff --git a/src/html_notfixed.outlang b/src/html_notfixed.outlang
index 4b5b49c..c6af7b0 100644
--- a/src/html_notfixed.outlang
+++ b/src/html_notfixed.outlang
@@ -2,6 +2,12 @@ include "html_common.outlang"
fixed "<tt>$text</tt>"
+doctemplate
+"<!-- Generator: $additional -->
+$header"
+"$footer"
+end
+
translations
"\n" "<br>\n"
" " "&nbsp; "
diff --git a/src/java.lang b/src/java.lang
index 742fac1..63371ca 100644
--- a/src/java.lang
+++ b/src/java.lang
@@ -6,6 +6,8 @@ include "number.lang"
include "c_string.lang"
+(keyword,normal,type) = `(\<(?:class|interface))([[:blank:]]+)([$[:alnum:]]+)`
+
keyword = "abstract|assert|break|case|catch|class|const",
"continue|default|do|else|extends|false|final",
"finally|for|goto|if|implements|instanceof|interface"
diff --git a/src/lang.map b/src/lang.map
index d15938b..efae070 100644
--- a/src/lang.map
+++ b/src/lang.map
@@ -38,8 +38,8 @@ caml = caml.lang
mli = caml.lang
sml = sml.lang
sig = sml.lang
-syslog = syslog.lang
-log = syslog.lang
+syslog = log.lang
+log = log.lang
pas = pascal.lang
pascal = pascal.lang
fortran = fortran.lang
diff --git a/src/langdef.lang b/src/langdef.lang
index 2229f23..ade37f6 100644
--- a/src/langdef.lang
+++ b/src/langdef.lang
@@ -5,13 +5,14 @@ comment start "#"
preproc = "include"
string delim "\"" "\"" escape "\\" multiline
-string delim "'" "'" escape "\\" multiline
+regexp delim "'" "'" escape "\\" multiline
+regexp delim "`" "`" escape "\\" multiline
keyword = "state|environment|begin|end|delim|escape|start",
"multiline|nested|vardef|exitall|exit",
"redef|subst|nonsensitive"
-symbol = "=|+|,"
+symbol = "=|+|,|(|)"
vardef ID = '[[:word:]]+'
diff --git a/src/lib/.cvsignore b/src/lib/.cvsignore
index f661af7..57922c9 100644
--- a/src/lib/.cvsignore
+++ b/src/lib/.cvsignore
@@ -52,4 +52,5 @@ test_readtags
test_langinfer
mytags
copyright.c
-reportbugs.c \ No newline at end of file
+reportbugs.c
+test_regexpreprocessor
diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am
index 2aa586e..cc904ae 100644
--- a/src/lib/Makefile.am
+++ b/src/lib/Makefile.am
@@ -1,4 +1,4 @@
-# Copyright (C) 1999-2006 Lorenzo Bettini <http://www.lorenzobettini.it>
+# Copyright (C) 1999-2007 Lorenzo Bettini <http://www.lorenzobettini.it>
#
# This file is free software; as a special exception the author gives
# unlimited permission to copy and/or distribute it, with or without
@@ -8,19 +8,11 @@
# WITHOUT ANY WARRANTY, to the extent permitted by law; without even the
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-SUFFIXES = .text .ggo
+SUFFIXES = .text
.text.c:
$(TXTC) -c $<
-if NO_GENGETOPT
-.ggo.c:
- touch $@
-else
-.ggo.c:
- $(GENGETOPT) --input $< --unamed-opts --no-handle-help --no-handle-version
-endif
-
if NO_DOUBLECPP
$(srcdir)/%.H: $(srcdir)/%.h
touch $@
@@ -36,19 +28,22 @@ LDADD = @LEXLIB@ $(top_builddir)/gl/libgnu.la
noinst_LTLIBRARIES = libcommon.la
-libcommon_la_SOURCES = copyright.text reportbugs.text cmdline.c messages.cc \
+libcommon_la_SOURCES = copyright.text reportbugs.text messages.cc \
fileutil.cc chartranslator.cc generatorfactory.cc docgenerator.cc styleparser.yy \
stylescanner.ll langdefparser.yy langdefscanner.ll outlangdefparser.yy \
stylecssparser.yy stylecssscanner.ll outlangdefscanner.ll srcuntabifier.cpp \
- generatormap.cpp maingeneratormap.cpp outputbuffer.cpp outputgenerator.cpp \
- mainoutputbuffer.cpp lineoutputgenerator.cpp stringdef.cpp langelem.cpp statelangelem.cpp \
+ textformatter.cpp outputbuffer.cpp outputgenerator.cpp \
+ mainoutputbuffer.cpp statelangelem.cpp \
langelems.cpp regexpreprocessor.cpp regexpstatebuilder.H regexpstatebuilder_dbtab.cc \
regexpstatebuilder.cpp langdefloader.cpp langmap.cpp statestartlangelem.cpp \
- stringlistlangelem.cpp delimitedlangelem.cpp vardefinitions.cpp textformatter.cpp \
+ stringlistlangelem.cpp delimitedlangelem.cpp vardefinitions.cpp \
textstyle.cpp textgenerator.cpp textstylebuilder.cpp doctemplate.cpp substfun.cpp \
- refgeneratormap.cpp readtags.c fileinfo.cpp linebuffer.cpp preformatter.cpp \
+ readtags.c fileinfo.cpp preformatter.cpp \
regexpstateprinter.cpp langelemsprinter.cpp langelemsprinter.H langelemsprinter_dbtab.cc \
- languageinfer.cpp stopwatch.cpp utils.cpp utils.h
+ languageinfer.cpp stopwatch.cpp utils.cpp utils.h \
+ regexpengine.cpp regexpstate.cpp regexpenginedebug.cpp \
+ namedsubexpslangelem.cpp
+
libcommon_la_LIBADD = $(BOOST_REGEX_LIB)
INCLUDES = -I@top_srcdir@/src
@@ -56,12 +51,9 @@ INCLUDES = -I@top_srcdir@/src
EXTRA_DIST = colors.h generatorfactory.h \
keys.h styleparser.h stylecssparser.h \
tokens.h messages.h fileutil.h \
- cmdline.h \
- chartranslator.h my_set.h my_sstream.h my_string.h \
+ chartranslator.h my_sstream.h \
linenumdigit.h globalostream.h \
docgenerator.h \
- cmdlineargs.h \
- cmdline.ggo \
copyright.h reportbugs.h \
srcuntabifier.h colormap.h \
langdefparser.h outlangdefparser.h \
@@ -98,10 +90,16 @@ styleparser.cc styleparser.h: $(srcdir)/styleparser.yy
stylecssparser.cc stylecssparser.h: $(srcdir)/stylecssparser.yy
$(YACC) -p stylecsssc_ -o $@ $(srcdir)/stylecssparser.yy --defines=$*.h
-TESTS = test_textstyle test_textgenerator test_outlangparser test_readtags \
-test_langinfer
+TESTS = test_textstyle test_textgenerator test_outlangparser \
+test_langinfer test_regexpreprocessor
+
+check_PROGRAMS = test_langdefparser test_langmap test_textstyle test_textgenerator test_outlangparser test_langinfer test_regexpreprocessor
+
+if !NO_CTAGS
+TESTS += test_readtags
+check_PROGRAMS += test_readtags
+endif
-check_PROGRAMS = test_langdefparser test_langmap test_textstyle test_textgenerator test_outlangparser test_readtags test_langinfer
test_langdefparser_SOURCES = test_langdefparser_main.cpp
test_langdefparser_LDADD = libcommon.la
@@ -126,6 +124,9 @@ test_readtags_LDADD = readtags.$(OBJEXT)
test_langinfer_SOURCES = test_langinfer.cpp
test_langinfer_LDADD = libcommon.la
+test_regexpreprocessor_SOURCES = test_regexpreprocessor_main.cpp
+test_regexpreprocessor_LDADD = libcommon.la
+
mytags: $(srcdir)/test_readtags_main.cpp
$(CTAGS) --excmd=n --fields=+n -o mytags $(srcdir)/test_readtags_main.cpp $(srcdir)/readtags.h
@@ -139,14 +140,16 @@ MAINTAINERCLEANFILES = styleparser.cc styleparser.h stylescanner.cc \
langdefparser.cc langdefparser.h langdefscanner.cc \
outlangdefparser.cc outlangdefparser.h outlangdefscanner.cc
-noinst_HEADERS = parsestyles.h generatormap.h maingeneratormap.h outputbuffer.h \
- outputgenerator.h mainoutputbuffer.h lineoutputgenerator.h stringdef.h langelem.h \
+noinst_HEADERS = parsestyles.h textformatter.h outputbuffer.h \
+ outputgenerator.h mainoutputbuffer.h stringdef.h langelem.h \
statelangelem.h langelems.h langdefparserfun.h outlangdefparserfun.h \
tostringcollection.h regexpreprocessor.h regexpstatebuilder.h langdefloader.h \
langdefscanner.h outlangdefscanner.h parsestruct.h langmap.h statestartlangelem.h \
- stringlistlangelem.h delimitedlangelem.h vardefinitions.h textformatter.h textstyle.h \
+ stringlistlangelem.h delimitedlangelem.h vardefinitions.h textstyle.h \
textstyles.h textgenerator.h textstylebuilder.h doctemplate.h substfun.h \
- parserinfo.h refgeneratormap.h readtags.h fileinfo.h linebuffer.h preformatter.h \
- regexpstateprinter.h langelemsprinter.h languageinfer.h stopwatch.h stylekey.h
+ parserinfo.h readtags.h fileinfo.h linebuffer.h preformatter.h \
+ regexpstateprinter.h langelemsprinter.h languageinfer.h stopwatch.h stylekey.h \
+ regexpengine.h regexpstate.h regexpenginedebug.h \
+ namedsubexpslangelem.h refposition.h
diff --git a/src/lib/delimitedlangelem.cpp b/src/lib/delimitedlangelem.cpp
index 8cafd20..a11757f 100644
--- a/src/lib/delimitedlangelem.cpp
+++ b/src/lib/delimitedlangelem.cpp
@@ -38,4 +38,11 @@ DelimitedLangElem::toString() const
return res;
}
+const std::string
+DelimitedLangElem::toStringOriginal() const
+{
+ string res = StateStartLangElem::toString() + " " + start->toStringOriginal() + (end ? " " + end->toStringOriginal() : "");
+ return res;
+}
+
diff --git a/src/lib/delimitedlangelem.h b/src/lib/delimitedlangelem.h
index c1df355..dceae26 100644
--- a/src/lib/delimitedlangelem.h
+++ b/src/lib/delimitedlangelem.h
@@ -42,6 +42,8 @@ public:
virtual const std::string toString() const;
+ virtual const std::string toStringOriginal() const;
+
void set_escape(StringDef *e) { escape = e; }
StringDef *getStart() const { return start; }
diff --git a/src/lib/fileutil.cc b/src/lib/fileutil.cc
index 52c7386..24dcf48 100644
--- a/src/lib/fileutil.cc
+++ b/src/lib/fileutil.cc
@@ -17,11 +17,14 @@
*
*/
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
#include <iostream>
#include <fstream>
#include "fileutil.h"
-#include "my_string.h"
#include "messages.h" // for verbosity
using namespace std;
diff --git a/src/lib/generatorfactory.cc b/src/lib/generatorfactory.cc
index dcbb908..983f1be 100644
--- a/src/lib/generatorfactory.cc
+++ b/src/lib/generatorfactory.cc
@@ -25,117 +25,105 @@
#include "textgenerator.h"
#include "textstyles.h"
#include "textstylebuilder.h"
-#include "refgeneratormap.h"
-
-// global
-#include "maingeneratormap.h"
-
-GeneratorFactory::GeneratorFactory(TextStylesPtr tstyles,
- PreFormatter *pf,
- bool gen_ref,
- const string &_ctags_file, RefPosition position,
- bool optimizations) :
- textStyles(tstyles), preformatter(pf),
- generate_references(gen_ref),
- ctags_file(_ctags_file), refposition(position),
- noOptimizations(optimizations)
-{
- generatormap = createGeneratorMap();
- generatormap->setNoOptimizations(noOptimizations);
+#include "textformatter.h"
+
+GeneratorFactory::GeneratorFactory(TextStylesPtr tstyles, PreFormatter *pf,
+ bool gen_ref, const string &_ctags_file, RefPosition position,
+ bool optimizations) :
+ textStyles(tstyles), preformatter(pf), generate_references(gen_ref),
+ ctags_file(_ctags_file), refposition(position),
+ noOptimizations(optimizations) {
+ textformatter = createTextFormatter();
+ textformatter->setNoOptimizations(noOptimizations);
}
-GeneratorFactory::~ GeneratorFactory()
-{
- if (generatormap)
- delete generatormap;
+GeneratorFactory::~GeneratorFactory() {
+ if (textformatter)
+ delete textformatter;
}
-GeneratorMap *
-GeneratorFactory::createGeneratorMap()
-{
- if (generate_references)
- return new RefGeneratorMap(preformatter, ctags_file,
- textStyles->refstyle, refposition);
-
- return new GeneratorMap(preformatter);
+TextFormatter *GeneratorFactory::createTextFormatter() {
+ if (generate_references)
+ return new TextFormatter(preformatter, ctags_file,
+ textStyles->refstyle, refposition);
+ else
+ return new TextFormatter(preformatter);
}
-string GeneratorFactory::preprocessColor(const string &color)
-{
- if ( color[0] == '"' && color[color.size()-1] == '"')
- return color.substr(1, color.size()-2);
- else
- return textStyles->colorMap->getColor (color);
+string GeneratorFactory::preprocessColor(const string &color) {
+ if (color[0] == '"' && color[color.size()-1] == '"')
+ return color.substr(1, color.size()-2);
+ else
+ return textStyles->colorMap->getColor(color);
}
bool GeneratorFactory::createGenerator(const string &key, const string &color,
- const string &bgcolor, StyleConstantsPtr styleconstants)
-{
- if (generatormap->hasGenerator(key))
- return false;
-
- if (! textStyles->onestyle.empty()) {
- generatormap->addGenerator (key, new TextGenerator(textStyles->onestyle.subst_style(key)));
- return true;
- }
-
- TextStyleBuilder textStyleBuilder(textStyles->starting_template, textStyles->style_separator);
-
- textStyleBuilder.start();
-
- if (styleconstants.get()) {
- for (StyleConstantsIterator it = styleconstants->begin(); it != styleconstants->end(); ++it) {
- switch( *it ){
- case ISBOLD:
- textStyleBuilder.add(textStyles->bold);
- break;
- case ISITALIC:
- textStyleBuilder.add(textStyles->italics);
- break;
- case ISUNDERLINE:
- textStyleBuilder.add(textStyles->underline);
- break;
- case ISFIXED:
- textStyleBuilder.add(textStyles->fixed);
- break;
- case ISNOTFIXED:
- textStyleBuilder.add(textStyles->notfixed);
- break;
- case ISNOREF:
- generatormap->addNoReference(key);
- break;
- }
+ const string &bgcolor, StyleConstantsPtr styleconstants) {
+ if (textformatter->hasGenerator(key))
+ return false;
+
+ if (! textStyles->onestyle.empty()) {
+ textformatter->addGenerator(key, new TextGenerator(textStyles->onestyle.subst_style(key)));
+ return true;
+ }
+
+ TextStyleBuilder textStyleBuilder(textStyles->starting_template,
+ textStyles->style_separator);
+
+ textStyleBuilder.start();
+
+ if (styleconstants.get()) {
+ for (StyleConstantsIterator it = styleconstants->begin(); it != styleconstants->end(); ++it) {
+ switch (*it) {
+ case ISBOLD:
+ textStyleBuilder.add(textStyles->bold);
+ break;
+ case ISITALIC:
+ textStyleBuilder.add(textStyles->italics);
+ break;
+ case ISUNDERLINE:
+ textStyleBuilder.add(textStyles->underline);
+ break;
+ case ISFIXED:
+ textStyleBuilder.add(textStyles->fixed);
+ break;
+ case ISNOTFIXED:
+ textStyleBuilder.add(textStyles->notfixed);
+ break;
+ case ISNOREF:
+ textformatter->addNoReference(key);
+ break;
+ }
+ }
}
- }
- if ( color.size () ) {
- textStyleBuilder.add(textStyles->color.subst_style(preprocessColor(color)));
- }
+ if (color.size()) {
+ textStyleBuilder.add(textStyles->color.subst_style(preprocessColor(color)));
+ }
- if ( bgcolor.size () ) {
- textStyleBuilder.add(textStyles->bg_color.subst_style(preprocessColor(bgcolor)));
- }
+ if (bgcolor.size()) {
+ textStyleBuilder.add(textStyles->bg_color.subst_style(preprocessColor(bgcolor)));
+ }
- TextStyle style = textStyleBuilder.end();
+ TextStyle style = textStyleBuilder.end();
- generatormap->addGenerator(key, new TextGenerator(style));
- return true;
+ textformatter->addGenerator(key, new TextGenerator(style));
+ return true;
}
-void GeneratorFactory::addDefaultGenerator()
-{
- TextGenerator *defaultGenerator = generatormap->hasGenerator(NORMAL);
-
- if (!defaultGenerator) {
-
- if (textStyles->onestyle.empty())
- defaultGenerator = new TextGenerator();
- else
- defaultGenerator = new TextGenerator(textStyles->onestyle.subst_style(NORMAL));
-
- generatormap->addGenerator (NORMAL, defaultGenerator);
- }
-
- generatormap->setDefaultGenerator(defaultGenerator);
+void GeneratorFactory::addDefaultGenerator() {
+ TextGenerator *defaultGenerator = textformatter->hasGenerator(NORMAL);
+
+ if (!defaultGenerator) {
+
+ if (textStyles->onestyle.empty())
+ defaultGenerator = new TextGenerator();
+ else
+ defaultGenerator = new TextGenerator(textStyles->onestyle.subst_style(NORMAL));
+
+ textformatter->addGenerator(NORMAL, defaultGenerator);
+ }
+
+ textformatter->setDefaultGenerator(defaultGenerator);
}
diff --git a/src/lib/generatorfactory.h b/src/lib/generatorfactory.h
index b7da58a..2845e81 100644
--- a/src/lib/generatorfactory.h
+++ b/src/lib/generatorfactory.h
@@ -27,7 +27,7 @@
#include <boost/shared_ptr.hpp>
#include "textstyles.h"
-#include "refgeneratormap.h"
+#include "refposition.h"
typedef enum { ISBOLD=1, ISITALIC, ISUNDERLINE, ISFIXED, ISNOTFIXED, ISNOREF } StyleConstant;
typedef std::list<StyleConstant> StyleConstants;
@@ -38,6 +38,7 @@ using std::string;
class TextGenerator;
class PreFormatter;
+class TextFormatter;
class GeneratorFactory
{
@@ -52,8 +53,10 @@ class GeneratorFactory
RefPosition refposition;
/// whether to turn off optimizations for generating output (default false)
bool noOptimizations;
+ /// the main text formatter
+ TextFormatter *textformatter;
- GeneratorMap *createGeneratorMap();
+ TextFormatter *createTextFormatter();
public:
GeneratorFactory(TextStylesPtr tstyles, PreFormatter *pf,
@@ -90,6 +93,8 @@ class GeneratorFactory
* @return
*/
string preprocessColor(const string &color);
+
+ TextFormatter *getTextFormatter() { return textformatter; }
};
#endif // GENERATORFACTORY_H
diff --git a/src/lib/generatormap.cpp b/src/lib/generatormap.cpp
deleted file mode 100644
index c7660e6..0000000
--- a/src/lib/generatormap.cpp
+++ /dev/null
@@ -1,154 +0,0 @@
-//
-// C++ Implementation: generatormap
-//
-// Description:
-//
-//
-// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2004
-//
-// Copyright: See COPYING file that comes with this distribution
-//
-//
-
-#include "generatormap.h"
-#include "textgenerator.h"
-#include "preformatter.h"
-#include "mainoutputbuffer.h"
-
-using namespace std;
-
-GeneratorMap::GeneratorMap(PreFormatter *pf) :
- default_generator (0),
- preformatter(pf), noOptimizations(false)
-{
-}
-
-GeneratorMap::~GeneratorMap()
-{
- for (MapType::const_iterator it = generatormap.begin(); it != generatormap.end(); ++it)
- delete it->second;
-}
-
-void
-GeneratorMap::setDefaultGenerator(TextGenerator *gen)
-{
- default_generator = gen;
-}
-
-void
-GeneratorMap::addGenerator(const std::string &elem, TextGenerator *gen)
-{
- generatormap[elem] = gen;
-}
-
-TextGenerator *GeneratorMap::hasGenerator(const string &elem)
-{
- MapType::const_iterator it = generatormap.find(elem);
- if (it == generatormap.end())
- return 0;
-
- return it->second;
-}
-
-TextGenerator *
-GeneratorMap::getGenerator(const string &elem)
-{
- MapType::const_iterator it = generatormap.find(elem);
- if (it == generatormap.end())
- {
- // create a copy of the prototype and substitute the style.
- TextGenerator *missing = new TextGenerator(*default_generator);
- missing->subst_style(elem);
-
- generatormap[elem] = missing;
- return missing;
- }
-
- return it->second;
-}
-
-void
-GeneratorMap::addNoReference(const std::string &elem)
-{
- noreferences.insert(elem);
-}
-
-bool
-GeneratorMap::isNoReference(const std::string &elem) const
-{
- return (noreferences.find(elem) != noreferences.end());
-}
-
-const string
-GeneratorMap::generateString( const std::string &elem, const std::string &s ,
- const FileInfo *p )
-{
- return getGenerator(elem)->generateEntire(preformatter->preformat(s));
-}
-
-void
-GeneratorMap::generateEntire( const std::string &elem, const std::string &s,
- const FileInfo *p )
-{
- if (noOptimizations) {
- // we generate the element right now, since during debugging
- // we want to be very responsive
- if (s.size())
- output(generateString(elem, s, p));
-
- return;
- }
-
- // otherwise we optmize output generation: delay formatting a specific
- // element until we deal with another element; this way strings that belong
- // to the same element are formatted using only one tag: e.g.,
- // <comment>/* mycomment */</comment>
- // instead of
- // <comment>/*</comment><comment>mycomment</comment><comment>*/</comment>
- if (elem == current_elem) {
- elem_buffer << s;
- } else {
- // first format the buffered string
- const string toformat = elem_buffer.str();
- if (toformat.size())
- output(generateString(current_elem, toformat, p));
-
- // then start a new buffer
- elem_buffer.str("");
- elem_buffer << s;
- current_elem = elem;
- current_file_info = p;
- }
-}
-
-void
-GeneratorMap::generateNL( const std::string &text )
-{
- // first format the buffered string
- flush();
-
- string preformat_text = preformatter->preformat(text);
-
- if (preformat_text == text)
- preformat_text = "\n";
-
- outputbuffer->output_ln(preformat_text);
-}
-
-void
-GeneratorMap::flush()
-{
- const string &remainder = elem_buffer.str();
- if (remainder.size()) {
- output(generateString(current_elem, remainder, current_file_info));
- elem_buffer.str("");
- current_elem = "";
- // each line is handled separately
- }
-}
-
-void
-GeneratorMap::output(const string &s)
-{
- outputbuffer->output(s);
-}
diff --git a/src/lib/generatormap.h b/src/lib/generatormap.h
deleted file mode 100644
index b6fa977..0000000
--- a/src/lib/generatormap.h
+++ /dev/null
@@ -1,86 +0,0 @@
-//
-// C++ Interface: generatormap
-//
-// Description:
-//
-//
-// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2004
-//
-// Copyright: See COPYING file that comes with this distribution
-//
-//
-#ifndef GENERATORMAP_H
-#define GENERATORMAP_H
-
-#include <map>
-#include <string>
-#include <set>
-#include "my_sstream.h"
-
-class TextGenerator;
-class PreFormatter;
-class FileInfo;
-
-/**
-map of generators; associate a generator for each program element, e.g., keyword, string, etc.
-
-@author Lorenzo Bettini
-*/
-class GeneratorMap
-{
- protected:
- typedef std::map<std::string, TextGenerator *> MapType;
- typedef std::set<std::string> NoRefType;
- MapType generatormap;
- /// those elements for which no reference info is generated
- NoRefType noreferences;
- TextGenerator *default_generator;
- PreFormatter *preformatter;
- /// where we buffer strings for the current elem
- std::ostringstream elem_buffer;
- /// the element that is currently buffered
- std::string current_elem;
- /// concerns the element currently buffered
- const FileInfo *current_file_info;
- /// whether to turn off optimizazionts (such as buffering), default: false
- bool noOptimizations;
-
- virtual const std::string generateString(const std::string &elem,
- const std::string &s, const FileInfo *);
- void output(const std::string &s);
-
- public:
- GeneratorMap(PreFormatter *);
- virtual ~GeneratorMap();
-
- /**
- * Returns the generator for the specific element name or null if
- * there's no generator for the element
- * @param elem
- * @return
- */
- TextGenerator *hasGenerator(const std::string &elem);
-
- /**
- * Retrieves the generator for a specific element; if it doesn't find it,
- * it creates a generator for that element, using the default generator
- * (i.e., the one for "normal" element)
- * @param elem
- * @return
- */
- TextGenerator *getGenerator(const std::string &elem);
- void addGenerator(const std::string &elem, TextGenerator *gen);
- void addNoReference(const std::string &elem);
- bool isNoReference(const std::string &elem) const;
-
- void setDefaultGenerator(TextGenerator *g);
-
- void generateEntire( const std::string &elem,
- const std::string &s, const FileInfo * );
- void generateNL( const std::string &s );
- void flush();
-
- void setNoOptimizations(bool n) { noOptimizations = n; }
-};
-
-#endif
diff --git a/src/lib/keys.h b/src/lib/keys.h
index 9fff74b..37332e7 100644
--- a/src/lib/keys.h
+++ b/src/lib/keys.h
@@ -2,16 +2,5 @@
#define KEYS_H
#define NORMAL "normal"
-#define KEYWORD "keyword"
-#define TYPE "type"
-#define STRING "string"
-#define COMMENT "comment"
-#define NUMBER "number"
-#define PREPROC "preproc"
-#define SYMBOL "symbol"
-#define FUNCTION "function"
-#define CBRACKET "cbracket"
-#define LINENO "lineno"
-#define GLOBAL "global"
#endif
diff --git a/src/lib/langdefparser.yy b/src/lib/langdefparser.yy
index 9ed5d8f..b524c5c 100644
--- a/src/lib/langdefparser.yy
+++ b/src/lib/langdefparser.yy
@@ -1,6 +1,6 @@
%{
/*
- * Copyright (C) 1999-2005 Lorenzo Bettini <http://www.lorenzobettini.it>
+ * Copyright (C) 1999-2007 Lorenzo Bettini <http://www.lorenzobettini.it>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -24,8 +24,6 @@
#include <iostream>
#include <string>
-#include "my_sstream.h"
-
#include "messages.h"
#include "stringdef.h"
#include "statelangelem.h"
@@ -36,6 +34,7 @@
#include "langdefparserfun.h"
#include "langdefscanner.h"
#include "vardefinitions.h"
+#include "namedsubexpslangelem.h"
using std::cerr;
using std::string;
@@ -64,6 +63,11 @@ struct Key : public ParserInfo
~Key() { delete key; }
};
+// this is a trick since ElementNames is a typedef and cannot
+// be used in the union below
+struct ElementNamesList : ElementNames {
+};
+
%}
%union {
@@ -76,12 +80,15 @@ struct Key : public ParserInfo
class StateLangElem *statelangelem;
class StateStartLangElem *statestartlangelem;
class LangElems *langelems;
+ class NamedSubExpsLangElem *namedsubexpslangelem;
struct Key *key;
+ class ElementNamesList *keys;
int flag ;
};
%token <tok> BEGIN_T END_T ENVIRONMENT_T STATE_T MULTILINE_T DELIM_T START_T ESCAPE_T NESTED_T EXIT_ALL EXIT_T VARDEF_T REDEF_T SUBST_T NONSENSITIVE_T
-%token <string> KEY STRINGDEF VARUSE
+%token <string> KEY STRINGDEF REGEXPNOPREPROC VARUSE
+%token <stringdef> REGEXPDEF
%type <stringdef> stringdef escapedef
%type <stringdefs> stringdefs
@@ -91,16 +98,18 @@ struct Key : public ParserInfo
%type <booloption> multiline startnewenv nested nonsensitive
%type <tok> exitall redefsubst
%type <key> key;
+%type <keys> keys;
%%
-allelements :
- {
- /* no definitions */
- /* synthetize a normal elem that catches everything */
- current_lang_elems = new LangElems;
+allelements :
+ {
+ /* no definitions (i.e., empty a .lang file with no definition) */
+ /* such as, default.lang */
+ /* synthetize a normal elem that catches everything */
+ current_lang_elems = new LangElems;
StringDefs *defs = new StringDefs;
- defs->push_back (new StringDef("(.*)"));
+ defs->push_back (new StringDef("(?:.*)"));
current_lang_elems->add(new StringListLangElem("normal", defs, false));
}
| elemdefs { current_lang_elems = 1ドル; }
@@ -155,6 +164,11 @@ complexelem : key DELIM_T stringdef stringdef escapedef multiline nested
$$->setParserInfo(1ドル);
delete 1ドル;
}
+ | '(' keys ')' '=' REGEXPNOPREPROC {
+ $$ = new NamedSubExpsLangElem(2,ドル new StringDef(*5ドル));
+ $$->setParserInfo(parsestruct->file_name, @1.first_line);
+ delete 5ドル;
+ }
;
key: KEY {
@@ -164,6 +178,20 @@ key: KEY {
}
;
+keys: keys ',' KEY
+ {
+ $$ = 1ドル;
+ $$->push_back(*3ドル);
+ delete 3ドル;
+ }
+ | KEY
+ {
+ $$ = new ElementNamesList;
+ $$->push_back(*1ドル);
+ delete 1ドル;
+ }
+;
+
escapedef : ESCAPE_T stringdef { $$ = 2ドル; }
| { $$ = 0; }
;
@@ -200,7 +228,14 @@ stringdefs : stringdefs ',' stringdef { $$ = 1ドル; $$->push_back(3ドル); }
$$->push_back(1ドル); }
;
-stringdef : STRINGDEF {
+stringdef : REGEXPDEF {
+ $$ = 1ドル;
+ }
+ | STRINGDEF {
+ $$ = new StringDef(*1ドル);
+ delete 1ドル;
+ }
+ | REGEXPNOPREPROC {
$$ = new StringDef(*1ドル);
delete 1ドル;
}
@@ -220,13 +255,12 @@ stringdef : STRINGDEF {
%%
+extern int langdef_lex_destroy (void);
+
void
yyerror( const char *s )
{
- ostringstream str ;
- str << parsestruct->file_name << ":" << parsestruct->line << ": " << s; // << ", in option declaration";
- printError( str.str(), cerr ) ;
- exit(EXIT_FAILURE);
+ exitError(s, parsestruct);
}
void
@@ -245,6 +279,10 @@ parse_lang_def()
delete vardefinitions;
parsestruct = 0;
vardefinitions = 0;
+
+ // release scanner memory
+ langdef_lex_destroy ();
+
return current_lang_elems;
}
@@ -259,6 +297,10 @@ parse_lang_def(const char *path, const char *name)
delete vardefinitions;
parsestruct = 0;
vardefinitions = 0;
+
+ // release scanner memory
+ langdef_lex_destroy ();
+
return current_lang_elems;
}
diff --git a/src/lib/langdefscanner.ll b/src/lib/langdefscanner.ll
index 6104bab..e9ff1de 100644
--- a/src/lib/langdefscanner.ll
+++ b/src/lib/langdefscanner.ll
@@ -1,6 +1,6 @@
%{
/*
- * Copyright (C) 1999-2005, Lorenzo Bettini, http://www.lorenzobettini.it
+ * Copyright (C) 1999-2007, Lorenzo Bettini, http://www.lorenzobettini.it
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -22,6 +22,9 @@
#include "langdefparser.h"
#include "langdefscanner.h"
#include "fileutil.h"
+#include "regexpreprocessor.h"
+#include "stringdef.h"
+#include "messages.h"
#include <stack>
@@ -40,6 +43,7 @@ static std::ostringstream buff;
static void buffer(const char *s);
static void buffer_escape(const char *c);
static const std::string *flush_buffer();
+static StringDef *flush_buffer_preproc();
static void open_include_file(const char *file);
static void close_include_file();
@@ -69,11 +73,11 @@ IDE [a-zA-Z_]([a-zA-Z0-9_])*
STRING \"[^\n"]+\"
-%s COMMENT_STATE STRING_STATE REGEXP_STATE INCLUDE_STATE
+%s COMMENT_STATE STRING_STATE REGEXP_STATE REGEXP_NOPREPROC_STATE INCLUDE_STATE
%%
-[ \t] {}
+<INITIAL,COMMENT_STATE,INCLUDE_STATE>[ \t] {}
\r {}
@@ -130,21 +134,38 @@ STRING \"[^\n"]+\"
<INITIAL>"=" { return '=' ; }
<INITIAL>"," { return ',' ; }
<INITIAL>"+" { return '+' ; }
+<INITIAL>"(" { updateTokenInfo(); return '(' ; }
+<INITIAL>")" { return ')' ; }
<INITIAL>\" { BEGIN(STRING_STATE) ; }
<STRING_STATE>("*"|"."|"?"|"+"|"("|")"|"{"|"}"|"["|"]"|"^"|"$") { buffer_escape( yytext ) ; }
<STRING_STATE>\\\| { buffer( yytext ) ; }
<STRING_STATE>\\\\ { buffer( yytext ) ; }
+<STRING_STATE>\\[[:digit:]] {
+ printError(parsestruct->file_name, parsestruct->line, "backreferences are not allowed") ;
+ exitError(parsestruct->file_name, parsestruct->line, "use backreferences only inside ` `") ;
+ }
<STRING_STATE>"\\\"" { buffer( yytext ) ; }
<STRING_STATE>\" { BEGIN(INITIAL) ; langdef_lval.string = flush_buffer() ; DEB2("STRINGDEF",langdef_lval.string); return STRINGDEF; }
<STRING_STATE>[^\n] { buffer( yytext ) ; }
<INITIAL>\' { BEGIN(REGEXP_STATE) ; }
<REGEXP_STATE>\\\\ { buffer( yytext ) ; }
+<REGEXP_STATE>\\[[:digit:]] {
+ printError(parsestruct->file_name, parsestruct->line, "backreferences are not allowed") ;
+ exitError(parsestruct->file_name, parsestruct->line, "use backreferences only inside ` `") ;
+ }
<REGEXP_STATE>"\\'" { buffer( "'" ) ; }
-<REGEXP_STATE>\' { BEGIN(INITIAL) ; langdef_lval.string = flush_buffer() ; DEB2("STRINGDEF",langdef_lval.string); return STRINGDEF; }
+<REGEXP_STATE>\' { BEGIN(INITIAL) ; langdef_lval.stringdef = flush_buffer_preproc() ; DEB2("REGEXPDEF",langdef_lval.string); return REGEXPDEF; }
<REGEXP_STATE>[^\n] { buffer( yytext ) ; }
+<INITIAL>` { BEGIN(REGEXP_NOPREPROC_STATE) ; }
+<REGEXP_NOPREPROC_STATE>\\\\ { buffer( yytext ) ; }
+<REGEXP_NOPREPROC_STATE>"\\`" { buffer( "'" ) ; }
+<REGEXP_NOPREPROC_STATE>` { BEGIN(INITIAL) ; langdef_lval.string = flush_buffer() ; DEB2("REGEXPNOPREPROCDEF",langdef_lval.string); return REGEXPNOPREPROC; }
+<REGEXP_NOPREPROC_STATE>[^\n] { buffer( yytext ) ; }
+
+
<INITIAL>{nl} { DEB("NEWLINE"); ++(parsestruct->line) ; }
<INITIAL>. { return yytext[0] ; }
@@ -168,6 +189,13 @@ const std::string *flush_buffer()
return ret;
}
+StringDef *flush_buffer_preproc()
+{
+ StringDef *sd = new StringDef(RegexPreProcessor::preprocess(buff.str()), buff.str());
+ buff.str("");
+ return sd;
+}
+
void _open_file_to_scan(const string &path, const string &name)
{
langdef_in = open_data_file_stream(path, name);
diff --git a/src/lib/langelem.cpp b/src/lib/langelem.cpp
deleted file mode 100644
index f3b53af..0000000
--- a/src/lib/langelem.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-//
-// C++ Implementation: %{MODULE}
-//
-// Description:
-//
-//
-// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR}
-//
-// Copyright: See COPYING file that comes with this distribution
-//
-//
-#include "langelem.h"
-
-using namespace std;
-
-LangElem::LangElem(const string &n) : name(n), redef(false), subst(false)
-{
-}
-
-
-LangElem::~LangElem()
-{
-}
-
-const std::string
-LangElem::toString() const
-{
- return name;
-}
-
diff --git a/src/lib/langelem.h b/src/lib/langelem.h
index 6bf51c8..fcf0f5a 100644
--- a/src/lib/langelem.h
+++ b/src/lib/langelem.h
@@ -33,12 +33,19 @@ class LangElem : public ParserInfo
bool subst; // whether this substitutes an existing language element
public:
- LangElem(const std::string &n);
+ LangElem(const std::string &n) : name(n), redef(false), subst(false) {}
- virtual ~LangElem();
+ virtual ~LangElem() {}
const std::string getName() const { return name; }
- virtual const std::string toString() const;
+ virtual const std::string toString() const { return name; }
+
+ /**
+ * return the original string representation of this element;
+ * this must be defined by subclasses
+ */
+ virtual const std::string toStringOriginal() const = 0;
+
bool isRedef() const { return redef; }
void setRedef() { redef = true; }
bool isSubst() const { return subst; }
diff --git a/src/lib/langelems.cpp b/src/lib/langelems.cpp
index efcd49c..3788d8c 100644
--- a/src/lib/langelems.cpp
+++ b/src/lib/langelems.cpp
@@ -33,6 +33,12 @@ LangElems::toString() const
return toStringCollection<LangElems>(this, '\n');
}
+const string
+LangElems::toStringOriginal() const
+{
+ return toStringOriginalCollection<LangElems>(this, '\n');
+}
+
void LangElems::add(LangElem *el)
{
push_back(el);
diff --git a/src/lib/langelems.h b/src/lib/langelems.h
index d8eb321..ff8548f 100644
--- a/src/lib/langelems.h
+++ b/src/lib/langelems.h
@@ -59,6 +59,7 @@ class LangElems : protected list<LangElem *>
void subst(LangElem *el);
const std::string toString() const;
+ const std::string toStringOriginal() const;
// doublecpp: dispatch methods, DO NOT MODIFY
public:
virtual void dispatch_build(RegExpStateBuilder *, RegExpStatePointer state);
diff --git a/src/lib/languageinfer.cpp b/src/lib/languageinfer.cpp
index 4e5962c..4eca066 100644
--- a/src/lib/languageinfer.cpp
+++ b/src/lib/languageinfer.cpp
@@ -43,6 +43,10 @@ const string LanguageInfer::infer( istream &stream )
boost::regex langRegEx("#[[:blank:]]*![[:blank:]]*(?:[\\./]*)(?:[[:alnum:]]+[\\./]+)*([[:alnum:]]+)");
// the regular expression for finding the language specification in a script file
+ // this such as #! /usr/bin/env perl
+ boost::regex langEnvRegEx("#[[:blank:]]*![[:blank:]]*(?:[\\./]*)(?:[[:alnum:]]+[\\./]+)*(?:env)[[:blank:]]+([[:alnum:]]+)");
+
+ // the regular expression for finding the language specification in a script file
// according to Emacs convention: # -*- language -*-
boost::regex langRegExEmacs("-\\*-[[:blank:]]*([[:alnum:]]+).*-\\*-");
@@ -61,6 +65,7 @@ const string LanguageInfer::infer( istream &stream )
read_line(&stream, secondLine);
boost::match_results<std::string::const_iterator> what;
+ boost::match_results<std::string::const_iterator> whatEnv;
boost::match_results<std::string::const_iterator> whatEamcs;
// first try the emacs specification
@@ -77,12 +82,19 @@ const string LanguageInfer::infer( istream &stream )
return whatEamcs[1];
}
- // try the sha-bang specification
+ // try also the env specification
+ boost::regex_search(firstLine,
+ whatEnv, langEnvRegEx, boost::match_default);
+
+ if (whatEnv[1].matched)
+ return whatEnv[1];
+
+ // finally try the sha-bang specification
boost::regex_search(firstLine,
what, langRegEx, boost::match_default);
if (what[1].matched)
return what[1];
-
+
return "";
}
diff --git a/src/lib/linebuffer.cpp b/src/lib/linebuffer.cpp
deleted file mode 100644
index 66d3b4b..0000000
--- a/src/lib/linebuffer.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-//
-// C++ Implementation: linebuffer
-//
-// Description:
-//
-//
-// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2005
-//
-// Copyright: See COPYING file that comes with this distribution
-//
-//
-#include "linebuffer.h"
-
-LineBuffer::LineBuffer()
-{
-}
-
-
-LineBuffer::~LineBuffer()
-{
-}
-
-bool LineBuffer::empty() const
-{
- return (buffer.str().size() == 0 && post.size() == 0);
-}
diff --git a/src/lib/linebuffer.h b/src/lib/linebuffer.h
index f55115b..e4d64d0 100644
--- a/src/lib/linebuffer.h
+++ b/src/lib/linebuffer.h
@@ -33,8 +33,8 @@ class LineBuffer
PostContents post; // to be generated after the line
public:
- LineBuffer();
- ~LineBuffer();
+ LineBuffer() {}
+ ~LineBuffer() {}
void output(const std::string &s) { buffer << s; }
void output_post(const std::string &s) { post.insert(s); }
@@ -42,7 +42,7 @@ class LineBuffer
const std::string getContents() const { return buffer.str(); }
const PostContents &getPostContents() const { return post; }
- bool empty() const;
+ bool empty() const { return (buffer.str().size() == 0 && post.size() == 0); }
};
typedef boost::shared_ptr<LineBuffer> LineBufferPtr;
diff --git a/src/lib/lineoutputgenerator.cpp b/src/lib/lineoutputgenerator.cpp
deleted file mode 100644
index bcdb3e0..0000000
--- a/src/lib/lineoutputgenerator.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-//
-// C++ Implementation: lineoutputgenerator
-//
-// Description:
-//
-//
-// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2004
-//
-// Copyright: See COPYING file that comes with this distribution
-//
-//
-#include "lineoutputgenerator.h"
-
-#include <iomanip>
-#include "linenumdigit.h"
-#include "my_sstream.h"
-#include "maingeneratormap.h"
-#include "textgenerator.h"
-#include "textstyle.h"
-
-using namespace std;
-
-static SubstitutionMapping substitutionmapping;
-
-LineOutputGenerator::LineOutputGenerator(ostream& os,
- TextStyle *anch, bool genref, const string &prefix,
- const std::string &line_pref):
- OutputGenerator(os, line_pref), generate_ref(genref && ! anch->empty()),
- anchor(anch), anchor_line_prefix(prefix), line_num(1)
-{
- line_num_generator = generatormap->getGenerator("linenum");
-}
-
-
-LineOutputGenerator::~LineOutputGenerator()
-{
-}
-
-
-void LineOutputGenerator::generate_line(const string &line)
-{
- generate_line_info();
- ++line_num;
- OutputGenerator::generate_line(line);
-}
-
-void LineOutputGenerator::reset()
-{
- OutputGenerator::reset();
- line_num = 1;
-}
-
-void
-LineOutputGenerator::generate_line_info()
-{
- ostringstream sout;
-
- sout << std::setw (line_num_digit) << std::setfill ('0')
- << line_num << ":";
-
- string line_str = line_num_generator->generateEntire(sout.str().c_str());
-
- if (generate_ref) {
- ostringstream line_n;
- line_n << anchor_line_prefix << line_num;
- sout.str("");
- substitutionmapping["$text"] = line_str;
- substitutionmapping["$linenum"] = line_n.str();
- sout << anchor->output(substitutionmapping);
- line_str = sout.str();
- }
-
- output_string(line_str + " ");
-}
diff --git a/src/lib/lineoutputgenerator.h b/src/lib/lineoutputgenerator.h
deleted file mode 100644
index 201abc1..0000000
--- a/src/lib/lineoutputgenerator.h
+++ /dev/null
@@ -1,48 +0,0 @@
-//
-// C++ Interface: lineoutputgenerator
-//
-// Description:
-//
-//
-// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2004
-//
-// Copyright: See COPYING file that comes with this distribution
-//
-//
-#ifndef LINEOUTPUTGENERATOR_H
-#define LINEOUTPUTGENERATOR_H
-
-#include <outputgenerator.h>
-
-class TextGenerator;
-class TextStyle;
-
-/**
-a specialized output generator that also generates the line number before each line
-
-@author Lorenzo Bettini
-*/
-class LineOutputGenerator : public OutputGenerator
-{
-private:
- bool generate_ref;
- TextStyle *anchor;
- std::string anchor_line_prefix;
- unsigned int line_num;
- TextGenerator *line_num_generator;
-
-public:
- LineOutputGenerator(std::ostream& os,
- TextStyle *anch, bool genref, const std::string &prefix,
- const std::string &line_pref);
-
- ~LineOutputGenerator();
-
-protected:
- virtual void generate_line(const std::string &line);
- virtual void reset();
-
- virtual void generate_line_info();
-};
-
-#endif
diff --git a/src/lib/maingeneratormap.cpp b/src/lib/maingeneratormap.cpp
deleted file mode 100644
index 8ebb379..0000000
--- a/src/lib/maingeneratormap.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-//
-// C++ Implementation: maingeneratormap
-//
-// Description:
-//
-//
-// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2004
-//
-// Copyright: See COPYING file that comes with this distribution
-//
-//
-#include "maingeneratormap.h"
-
-GeneratorMap *generatormap;
diff --git a/src/lib/maingeneratormap.h b/src/lib/maingeneratormap.h
deleted file mode 100644
index f356940..0000000
--- a/src/lib/maingeneratormap.h
+++ /dev/null
@@ -1,19 +0,0 @@
-//
-// C++ Interface: maingeneratormap
-//
-// Description:
-//
-//
-// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2004
-//
-// Copyright: See COPYING file that comes with this distribution
-//
-//
-#ifndef MAINGENERATORMAP_H
-#define MAINGENERATORMAP_H
-
-#include "generatormap.h"
-
-extern GeneratorMap *generatormap;
-
-#endif
diff --git a/src/lib/messages.cc b/src/lib/messages.cc
index b5439af..32c1df6 100644
--- a/src/lib/messages.cc
+++ b/src/lib/messages.cc
@@ -1,5 +1,5 @@
/*
-** Copyright (C) 1999, 2000, Lorenzo Bettini <http://www.lorenzobettini.it>
+** Copyright (C) 1999-2007, Lorenzo Bettini <http://www.lorenzobettini.it>
**
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
@@ -27,6 +27,8 @@
#include <stdlib.h>
#include "messages.h"
+#include "parserinfo.h"
+#include "parsestruct.h"
static Messages *_messager = 0 ;
@@ -65,9 +67,16 @@ void printError( const std::string &s, ostream &stream ) {
stream << s << endl;
}
+void printError( const std::string &s, const ParserInfo *pinfo, ostream &stream ) {
+ printError(pinfo->filename, pinfo->line, s, stream);
+}
+
void printError(const std::string &filename, unsigned int line, const std::string &error, ostream & stream)
{
- stream << filename << ":" << line << ": " << error << endl;
+ stream << filename << ":";
+ if (line)
+ stream << line << ": ";
+ stream << error << endl;
}
void setMessager( Messages *m ) {
@@ -86,9 +95,29 @@ void exitError(const std::string &error)
exit(EXIT_FAILURE);
}
+void exitError(const std::string &error, const ParserInfo *pinfo)
+{
+ exitError(pinfo->filename, pinfo->line, error);
+}
+
+void exitError(const std::string &error, const ParseStruct *pinfo)
+{
+ exitError((pinfo->path.size() ? pinfo->path + "/" : "") + pinfo->file_name, pinfo->line, error);
+}
+
+void exitError(const std::string &filename, unsigned int line, const std::string &error)
+{
+ cerr << PACKAGE << ": ";
+ printError(filename, line, error);
+ exit(EXIT_FAILURE);
+}
+
void foundBug(const std::string &error, const std::string &file, int line)
{
cerr << PACKAGE << ": " << error << ", " << file << ":" << line << endl;
+ cerr << PACKAGE << ": " << "it looks like you found a bug of this program" << endl;
+ cerr << PACKAGE << ": " << "could you please send this output and the input file" << endl;
+ cerr << PACKAGE << ": " << "to the author of this program?" << endl;
exit(EXIT_FAILURE);
}
diff --git a/src/lib/messages.h b/src/lib/messages.h
index 833aa72..4fb5f8b 100644
--- a/src/lib/messages.h
+++ b/src/lib/messages.h
@@ -11,6 +11,9 @@ using std::ostream;
using std::cerr;
using std::endl;
+class ParserInfo;
+class ParseStruct;
+
class Messages {
public:
/// whether to print anything
@@ -66,9 +69,13 @@ void printMessage_noln( const std::string &s, ostream &stream = cerr ) ;
void printWarning( const char *s, ostream &stream = cerr ) ;
void printError( const char *s, ostream &stream = cerr ) ;
void printError( const std::string &s, ostream &stream = cerr ) ;
+void printError( const std::string &s, const ParserInfo *pinfo, ostream &stream = cerr ) ;
void printError(const std::string &filename, unsigned int line, const std::string &error, ostream & stream = cerr );
void memory_exhausted();
void exitError(const std::string &error);
+void exitError(const std::string &error, const ParserInfo *pinfo);
+void exitError(const std::string &error, const ParseStruct *pinfo);
+void exitError(const std::string &filename, unsigned int line, const std::string &error);
void foundBug(const std::string &error, const std::string &file, int line);
bool shouldPrint();
diff --git a/src/lib/my_set.h b/src/lib/my_set.h
deleted file mode 100644
index c7cce53..0000000
--- a/src/lib/my_set.h
+++ /dev/null
@@ -1,16 +0,0 @@
-// deal with namespace problems
-
-#ifndef _MY_SET_H
-#define _MY_SET_H
-
-#include <set>
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#ifdef HAVE_NAMESPACES
-using std::set;
-#endif
-
-#endif // _MY_SET_H
diff --git a/src/lib/my_string.h b/src/lib/my_string.h
deleted file mode 100644
index e22d2c7..0000000
--- a/src/lib/my_string.h
+++ /dev/null
@@ -1,11 +0,0 @@
-// deal with namespace problems
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif // HAVE_CONFIG_H
-
-#include <string>
-
-#ifdef HAVE_NAMESPACES
-using std::string;
-#endif
diff --git a/src/lib/namedsubexpslangelem.cpp b/src/lib/namedsubexpslangelem.cpp
new file mode 100644
index 0000000..c3a4201
--- /dev/null
+++ b/src/lib/namedsubexpslangelem.cpp
@@ -0,0 +1,47 @@
+//
+// C++ Interface: NamedSubExpsLangElem
+//
+// Description: represents a regular expression made by many marked groups
+// and each marked group represents a different language element
+//
+// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2007
+//
+// Copyright: See COPYING file that comes with this distribution
+//
+//
+
+#include "namedsubexpslangelem.h"
+#include "stringdef.h"
+#include "tostringcollection.h"
+
+using namespace std;
+
+NamedSubExpsLangElem::NamedSubExpsLangElem(const ElementNames *names, StringDef *def,
+ bool ex, bool al) :
+ StateStartLangElem("named subexps", ex, al), // "named subexps" is a bogus name
+ elementNames(names), regexpDef(def)
+{
+}
+
+NamedSubExpsLangElem::~NamedSubExpsLangElem() {
+ if (elementNames)
+ delete elementNames;
+ if (regexpDef)
+ delete regexpDef;
+}
+
+const std::string
+NamedSubExpsLangElem::toString() const
+{
+ string res = StateStartLangElem::toString() + " " + collectionToString(elementNames, ',') +
+ regexpDef->toString();
+ return res;
+}
+
+const std::string
+NamedSubExpsLangElem::toStringOriginal() const
+{
+ string res = StateStartLangElem::toString() + " " + collectionToString(elementNames, ',') +
+ regexpDef->toStringOriginal();
+ return res;
+}
diff --git a/src/lib/namedsubexpslangelem.h b/src/lib/namedsubexpslangelem.h
new file mode 100644
index 0000000..43126a4
--- /dev/null
+++ b/src/lib/namedsubexpslangelem.h
@@ -0,0 +1,50 @@
+//
+// C++ Interface: NamedSubExpsLangElem
+//
+// Description: represents a regular expression made by many marked groups
+// and each marked group represents a different language element
+//
+// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2007
+//
+// Copyright: See COPYING file that comes with this distribution
+//
+//
+
+#ifndef NAMEDSUBEXPSLANGELEM_H_
+#define NAMEDSUBEXPSLANGELEM_H_
+
+#include "statestartlangelem.h"
+
+#include <list>
+
+class StringDef;
+
+typedef std::list<std::string> ElementNames;
+
+// doublecpp: forward declarations, DO NOT MODIFY
+class RegExpStateBuilder; // file: regexpstatebuilder.h
+class RegExpStatePointer; // file: regexpstatebuilder.h
+// doublecpp: end, DO NOT MODIFY
+
+class NamedSubExpsLangElem : public StateStartLangElem
+{
+ const ElementNames *elementNames;
+ StringDef *regexpDef;
+public:
+ NamedSubExpsLangElem(const ElementNames *names, StringDef *def, bool exit = false, bool all = false);
+ virtual ~NamedSubExpsLangElem();
+
+ virtual const std::string toString() const;
+
+ virtual const std::string toStringOriginal() const;
+
+ const ElementNames *getElementNames() const { return elementNames; }
+ const StringDef *getRegexpDef() const { return regexpDef; }
+
+// doublecpp: dispatch methods, DO NOT MODIFY
+public:
+virtual void dispatch_build(RegExpStateBuilder *, RegExpStatePointer state);
+// doublecpp: end, DO NOT MODIFY
+};
+
+#endif /*NAMEDSUBEXPSLANGELEM_H_*/
diff --git a/src/lib/outlangdefparser.yy b/src/lib/outlangdefparser.yy
index 4c8a28c..8caf551 100644
--- a/src/lib/outlangdefparser.yy
+++ b/src/lib/outlangdefparser.yy
@@ -1,6 +1,6 @@
%{
/*
- * Copyright (C) 1999-2005 Lorenzo Bettini <http://www.lorenzobettini.it>
+ * Copyright (C) 1999-2007 Lorenzo Bettini <http://www.lorenzobettini.it>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -24,8 +24,6 @@
#include <iostream>
#include <string>
-#include "my_sstream.h"
-
#include "messages.h"
#include "parsestruct.h"
#include "outlangdefscanner.h"
@@ -235,22 +233,13 @@ translation : REGEXDEF REGEXDEF
%%
-void
-yyerror( const char *s )
-{
- ostringstream str ;
- str << outlang_parsestruct->file_name << ":" << outlang_parsestruct->line << ": " << s; // << ", in option declaration";
- printError( str.str(), cerr ) ;
- exit(EXIT_FAILURE);
-}
+extern int outlangdef_lex_destroy (void);
-/*
void
-yyerror( const string &s )
+yyerror( const char *s )
{
- yyerror(s.c_str());
+ exitError(s, outlang_parsestruct);
}
-*/
TextStylesPtr
parse_outlang_def()
@@ -262,6 +251,10 @@ parse_outlang_def()
outlangdef_parse();
delete outlang_parsestruct;
outlang_parsestruct = 0;
+
+ // release scanner memory
+ outlangdef_lex_destroy ();
+
return textstyles;
}
@@ -276,6 +269,10 @@ parse_outlang_def(const char *path, const char *name)
outlangdef_parse();
delete outlang_parsestruct;
outlang_parsestruct = 0;
+
+ // release scanner memory
+ outlangdef_lex_destroy ();
+
return textstyles;
}
@@ -292,6 +289,10 @@ parse_outlang_def_file(const char *path, const char *name)
outlangdef_parse();
delete outlang_parsestruct;
outlang_parsestruct = 0;
+
+ // release scanner memory
+ outlangdef_lex_destroy ();
+
return textstyles;
}
diff --git a/src/lib/outputgenerator.cpp b/src/lib/outputgenerator.cpp
index c43fccf..abeb06c 100644
--- a/src/lib/outputgenerator.cpp
+++ b/src/lib/outputgenerator.cpp
@@ -4,59 +4,95 @@
// Description:
//
//
-// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2004
+// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 1999-2007
//
// Copyright: See COPYING file that comes with this distribution
//
//
#include "outputgenerator.h"
+#include <iomanip>
+#include "linenumdigit.h"
+#include "my_sstream.h"
+#include "textgenerator.h"
+#include "textstyle.h"
+
using namespace std;
+/// used for line information generation
+static SubstitutionMapping substitutionmapping;
+
OutputGenerator::OutputGenerator(ostream &os, const std::string &pref) :
- output(os), line_prefix(pref), alwaysFlush(false)
-{
+ output(os), line_prefix(pref), alwaysFlush(false), generateLineInfo(false),
+ line_num(1) {
+}
+
+OutputGenerator::OutputGenerator(ostream& os, TextGenerator *linegen,
+ TextStyle *anch, bool genref, const string &prefix,
+ const std::string &line_pref) :
+ output(os), line_prefix(line_pref), alwaysFlush(false), generateLineInfo(true),
+ generate_ref(genref && ! anch->empty()), anchor(anch), anchor_line_prefix(prefix), line_num(1),
+ line_num_generator(linegen) {
}
-OutputGenerator::~OutputGenerator()
-{
+OutputGenerator::~OutputGenerator() {
}
-void
-OutputGenerator::output_string(const string &s)
-{
+void OutputGenerator::output_string(const string &s) {
output << s;
if (alwaysFlush)
- flush();
+ flush();
}
-void
-OutputGenerator::outputLine(const string &line)
-{
- if (line_prefix.size())
- output_string(line_prefix);
+void OutputGenerator::outputLine(const string &line) {
+ if (line_prefix.size())
+ output_string(line_prefix);
- output_string(line);
+ output_string(line);
}
-void
-OutputGenerator::generateLine(const string &line)
-{
- if (line_prefix.size())
- output_string(line_prefix);
+void OutputGenerator::generateLine(const string &line) {
+ if (line_prefix.size())
+ output_string(line_prefix);
+
+ generate_line(line);
+}
+
+void OutputGenerator::generate_line(const string &line) {
+ if (generateLineInfo)
+ generate_line_info();
+
+ ++line_num;
- generate_line(line);
+ output_string(line);
}
-void
-OutputGenerator::generate_line(const string &line)
-{
- output_string(line);
+void OutputGenerator::flush() {
+ output << std::flush;
}
-void
-OutputGenerator::flush()
-{
- output << std::flush;
+void OutputGenerator::reset() {
+ line_num = 1;
}
+
+void OutputGenerator::generate_line_info() {
+ ostringstream sout;
+
+ sout << std::setw(line_num_digit)<< std::setfill('0')<< line_num << ":";
+
+ string line_str = line_num_generator->generateEntire(sout.str().c_str());
+
+ if (generate_ref) {
+ ostringstream line_n;
+ line_n << anchor_line_prefix << line_num;
+ sout.str("");
+ substitutionmapping["$text"] = line_str;
+ substitutionmapping["$linenum"] = line_n.str();
+ sout << anchor->output(substitutionmapping);
+ line_str = sout.str();
+ }
+
+ output_string(line_str + " ");
+}
+
diff --git a/src/lib/outputgenerator.h b/src/lib/outputgenerator.h
index fd85058..09ee1dc 100644
--- a/src/lib/outputgenerator.h
+++ b/src/lib/outputgenerator.h
@@ -14,6 +14,9 @@
#include <iostream>
+class TextGenerator;
+class TextStyle;
+
/**
base class that actually writes the generated output to the output stream
@@ -28,11 +31,26 @@ class OutputGenerator
std::string line_prefix;
/// whether to flush the stream at each output (default = false)
bool alwaysFlush;
+
+ /// whether to generate line information
+ bool generateLineInfo;
+
+ // for line information
+
+ bool generate_ref;
+ TextStyle *anchor;
+ std::string anchor_line_prefix;
+ unsigned int line_num;
+ TextGenerator *line_num_generator;
public:
OutputGenerator(std::ostream &os, const std::string &pref);
+
+ OutputGenerator(std::ostream& os, TextGenerator *linegen,
+ TextStyle *anch, bool genref, const std::string &prefix,
+ const std::string &line_pref);
- virtual ~OutputGenerator();
+ ~OutputGenerator();
void setAlwaysFlush(bool b) { alwaysFlush = b; }
@@ -61,10 +79,9 @@ class OutputGenerator
void flush();
/**
- * Resets the generator. By default it does nothing, but it can be
- * overidden by subclasses
+ * Resets the generator (i.e., resets line number)
*/
- virtual void reset() {}
+ void reset();
protected:
/**
@@ -73,7 +90,12 @@ class OutputGenerator
*
* @param s
*/
- virtual void generate_line(const std::string &s);
+ void generate_line(const std::string &s);
+
+ /**
+ * Generates line information
+ */
+ void generate_line_info();
};
#endif
diff --git a/src/lib/parserinfo.h b/src/lib/parserinfo.h
index 9d6eee3..e2c54af 100644
--- a/src/lib/parserinfo.h
+++ b/src/lib/parserinfo.h
@@ -24,7 +24,7 @@ struct ParserInfo {
std::string filename; // including path
unsigned int line;
- ParserInfo() {}
+ ParserInfo() : line(0) {}
ParserInfo(const std::string &n) : filename(n) {}
void setParserInfo(const std::string &name, unsigned int l)
diff --git a/src/lib/parsestyles.h b/src/lib/parsestyles.h
index a753917..6de12d1 100644
--- a/src/lib/parsestyles.h
+++ b/src/lib/parsestyles.h
@@ -19,7 +19,7 @@ class GeneratorFactory;
void parseStyles(const std::string &path, const std::string &name,
GeneratorFactory *generatorFactory,
std::string &bodyBgColor) ;
-void parseStyleError(const std::string &error) ;
+void parseStyleError(const std::string &error, bool exit = true) ;
/// for css style files
void parseCssStyles(const std::string &path, const std::string &name,
diff --git a/src/lib/readtags.c b/src/lib/readtags.c
index 8cc0291..8a58827 100644
--- a/src/lib/readtags.c
+++ b/src/lib/readtags.c
@@ -1,5 +1,5 @@
/*
-* $Id: readtags.c,v 1.7 2007年03月23日 18:13:11 bettini Exp $
+* $Id: readtags.c,v 1.8 2007年06月08日 10:11:30 bettini Exp $
*
* Copyright (c) 1996-2003, Darren Hiebert
*
diff --git a/src/lib/readtags.h b/src/lib/readtags.h
index ce32611..2bf2ccf 100644
--- a/src/lib/readtags.h
+++ b/src/lib/readtags.h
@@ -1,5 +1,5 @@
/*
-* $Id: readtags.h,v 1.7 2007年03月23日 18:13:11 bettini Exp $
+* $Id: readtags.h,v 1.8 2007年06月08日 10:11:30 bettini Exp $
*
* Copyright (c) 1996-2003, Darren Hiebert
*
diff --git a/src/lib/refgeneratormap.cpp b/src/lib/refgeneratormap.cpp
deleted file mode 100644
index 8a1e30f..0000000
--- a/src/lib/refgeneratormap.cpp
+++ /dev/null
@@ -1,214 +0,0 @@
-//
-// C++ Implementation: refgeneratormap
-//
-// Description:
-//
-//
-// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2005
-//
-// Copyright: See COPYING file that comes with this distribution
-//
-//
-#include "refgeneratormap.h"
-#include "messages.h"
-#include "fileinfo.h"
-#include "fileutil.h"
-#include "mainoutputbuffer.h"
-#include "preformatter.h"
-
-#include <boost/regex.hpp>
-#include <list>
-
-using namespace std;
-
-bool isTaggable(const string &elem)
-{
- return elem.find_first_of(' ') == string::npos;
-}
-
-RefGeneratorMap::RefGeneratorMap(PreFormatter *pf, const string &_ctags_file_name,
- const TextStyles::RefTextStyle &r, RefPosition pos)
- : GeneratorMap(pf), ctags_file_name(_ctags_file_name),
- refstyle(r), refposition(pos)
-{
- ctags_file = tagsOpen (ctags_file_name.c_str(), &info);
- if (ctags_file == 0)
- {
- exitError("cannot open tag file: " + ctags_file_name);
- }
-}
-
-
-RefGeneratorMap::~RefGeneratorMap()
-{
- tagsClose(ctags_file);
-}
-
-//#define DEBUGREF
-#ifdef DEBUGREF
-#include <iostream>
-#define DEB(x) cerr << x << endl;
-#define DEB2(x) cerr << x ;
-#else
-#define DEB(x) ;
-#define DEB2(x) ;
-#endif
-
-static boost::regex string_or_space_regex("([^[:blank:]]+)|([[:blank:]]+)");
-static SubstitutionMapping ref_substitutionmapping;
-
-#define SPACE 2
-#define NOT_SPACE 1
-
-/*
-* separates a line in block of spaces and block of non spaces.
-* the stringbuffer tokens stores the pieces seen so far for which no
-* entry in the tag file was found.
-*
-* for each block of non spaces tries to look for an entry in the tag.
-* if it finds it flushes the stringbuffer tokens (by passing its contents
-* to the parent class implementation of generateString).
-*
-* For instance (notice the spaces among the +)
-* "myfield + myfield2 + myfield3
-* if only an entry for myfield2 is found, the we will generate
-* 3 blocks:
-* "myfield + "
-* "myfield2"
-* " + myfield3"
-*/
-const std::string RefGeneratorMap::generateString(const std::string& elem, const std::string& s, const FileInfo* fileinfo)
-{
- if (isNoReference(elem))
- return GeneratorMap::generateString(elem, s, fileinfo);
-
- buffer.str("");
- ostringstream tokens;
-
- boost::sregex_iterator i(s.begin(), s.end(), string_or_space_regex);
- boost::sregex_iterator j;
- while (i != j) {
- if ((*i)[SPACE].matched) {
- tokens << string((*i)[SPACE].first, (*i)[SPACE].second);
- } else {
- string not_spaces = string((*i)[NOT_SPACE].first, (*i)[NOT_SPACE].second);
- string found_refs = _generateString(elem, not_spaces, fileinfo);
- if (found_refs.size()) {
- const string &previous = tokens.str();
- if (previous.size()) {
- buffer << GeneratorMap::generateString(elem, previous, fileinfo);
- tokens.str("");
- }
- buffer << found_refs;
- } else {
- tokens << not_spaces;
- }
- }
-
- ++i;
- }
-
- const string &remainder = tokens.str();
- if (remainder.size()) {
- buffer << GeneratorMap::generateString(elem, remainder, fileinfo);
- }
-
- return buffer.str();
-}
-
-struct RefEntry
-{
- string filename;
- unsigned long linenumber;
-};
-
-const string RefGeneratorMap::_generateString(const std::string& elem, const std::string& s, const FileInfo* fileinfo)
-{
- tagEntry entry;
- bool found = false; // whether we found a tag
- bool found_anchor = false; // whether we found an anchor
- string output;
- typedef list<RefEntry> FoundRefList;
- FoundRefList foundreflist;
-
- DEB("inspecting " + s)
-
- if (tagsFind (ctags_file, &entry, s.c_str(), TAG_FULLMATCH) == TagSuccess)
- {
- found = true;
- do
- {
- RefEntry refentry;
- refentry.filename = entry.file;
- if ((refentry.filename == fileinfo->filename ||
- refentry.filename == fileinfo->input_file_name) &&
- entry.address.lineNumber == fileinfo->line) {
- ostringstream gen_info;
- // we just found the reference to this very element
- // so we must generate an anchor
- gen_info << entry.address.lineNumber;
- DEB(" found anchor " + gen_info.str());
- ref_substitutionmapping["$text"] = preformatter->preformat(s);
- ref_substitutionmapping["$infilename"] = strip_file_path(refentry.filename);
- ref_substitutionmapping["$infile"] = refentry.filename;
- ref_substitutionmapping["$linenum"] = gen_info.str();
- output = refstyle.anchor.output(ref_substitutionmapping);
- found_anchor = true;
- break;
- }
- DEB2(" found " + string(entry.name) + " : ");
- DEB(entry.address.lineNumber);
- refentry.linenumber = entry.address.lineNumber;
- foundreflist.push_back(refentry);
- } while (tagsFindNext (ctags_file, &entry) == TagSuccess);
- }
-
- if (found) {
- if (! found_anchor) {
- ref_substitutionmapping["$text"] = preformatter->preformat(s);
- TextStyle *referencestyle = 0;
- if ((foundreflist.size()>1 && refposition == INLINE) || refposition == POSTLINE)
- referencestyle = &(refstyle.postline_reference);
- else if (refposition == POSTDOC)
- referencestyle = &(refstyle.postdoc_reference);
- else
- referencestyle = &(refstyle.inline_reference);
-
- for (FoundRefList::const_iterator it = foundreflist.begin(); it != foundreflist.end(); ++it) {
- ostringstream gen_info;
- // we found where this element appears so we generate a reference
- // if it's a link in the same file, we use the output_file_name...
- if (it->filename == fileinfo->filename || it->filename == fileinfo->input_file_name)
- gen_info << fileinfo->output_file_name;
- else
- gen_info << it->filename << fileinfo->output_file_extension;
- // ...otherwise we build the referenced file by using the output_file_extension
- // in fact, in this case, it probably means that multiple input files have been specified
-
- ref_substitutionmapping["$outfile"] = gen_info.str();
- ref_substitutionmapping["$infilename"] = strip_file_path(it->filename);
- ref_substitutionmapping["$infile"] = it->filename;
-
- gen_info.str("");
- gen_info << it->linenumber;
- ref_substitutionmapping["$linenum"] = gen_info.str();
- output += referencestyle->output(ref_substitutionmapping);
-
- // if the following is true, it means that there more than one reference
- if (foundreflist.size() > 1 || refposition != INLINE) {
- output += preformatter->preformat("\n");
-
- if (refposition == POSTLINE || refposition == INLINE) {
- outputbuffer->output_postline(output);
- } else { // (refposition == POSTDOC)
- outputbuffer->output_post(output);
- }
-
- output = ""; // no need to modify the current element
- }
- }
- }
- }
-
- return output;
-}
diff --git a/src/lib/refgeneratormap.h b/src/lib/refgeneratormap.h
deleted file mode 100644
index 3db34dc..0000000
--- a/src/lib/refgeneratormap.h
+++ /dev/null
@@ -1,49 +0,0 @@
-//
-// C++ Interface: refgeneratormap
-//
-// Description:
-//
-//
-// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2005
-//
-// Copyright: See COPYING file that comes with this distribution
-//
-//
-#ifndef REFGENERATORMAP_H
-#define REFGENERATORMAP_H
-
-#include <generatormap.h>
-
-#include "my_sstream.h"
-#include "readtags.h"
-#include "textstyles.h"
-
-typedef enum {INLINE=1, POSTLINE, POSTDOC} RefPosition;
-
-/**
-A specialized GeneratorMap that also generates anchors and references, by using ctags information
-
-@author Lorenzo Bettini
-*/
-class RefGeneratorMap : public GeneratorMap
-{
-private:
- const std::string ctags_file_name;
- TextStyles::RefTextStyle refstyle;
- RefPosition refposition;
- tagFile *ctags_file;
- tagFileInfo info;
- std::ostringstream buffer;
-
-public:
- RefGeneratorMap(PreFormatter *pf, const std::string &_ctags_file_name,
- const TextStyles::RefTextStyle &r, RefPosition pos);
-
- ~RefGeneratorMap();
-
-protected:
- virtual const std::string generateString(const std::string& elem, const std::string& s, const FileInfo* arg1);
- const std::string _generateString(const std::string& elem, const std::string& s, const FileInfo* arg1);
-};
-
-#endif
diff --git a/src/lib/refposition.h b/src/lib/refposition.h
new file mode 100644
index 0000000..f80c9e0
--- /dev/null
+++ b/src/lib/refposition.h
@@ -0,0 +1,8 @@
+#ifndef REFPOSITION_H
+#define REFPOSITION_H
+
+// where a reference must be put
+
+typedef enum {INLINE=1, POSTLINE, POSTDOC} RefPosition;
+
+#endif // REFPOSITION_H
diff --git a/src/lib/regexpengine.cpp b/src/lib/regexpengine.cpp
new file mode 100644
index 0000000..37d377d
--- /dev/null
+++ b/src/lib/regexpengine.cpp
@@ -0,0 +1,265 @@
+//
+// C++ Implementation: regexpengine
+//
+// Description:
+//
+//
+// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 1999-2007
+//
+// Copyright: See COPYING file that comes with this distribution
+//
+//
+#include "regexpengine.h"
+
+RegExpEngine::~RegExpEngine() {
+}
+
+#include <fstream>
+#include <iostream>
+#include <stdlib.h>
+
+#include "textformatter.h"
+#include "keys.h"
+#include "langdefloader.h"
+#include "messages.h"
+#include "parserinfo.h"
+
+// purpose:
+// takes the contents of a file and transform to
+// syntax highlighted code in html format
+
+using namespace std;
+
+typedef enum {FOUND_EOF=0, FOUND_NL, FOUND_END} load_line_ret;
+
+load_line_ret load_line(std::string& s, std::istream& is) {
+ s.erase();
+ if (is.bad()|| is.eof())
+ return FOUND_EOF;
+
+ char c;
+ while (is.get(c)) {
+ if (c == '\n')
+ return FOUND_NL;
+ if (c != '\r')
+ s.append(1, c);
+ }
+
+ return FOUND_END;
+}
+
+void RegExpEngine::process_file(const char *file) {
+ istream *is = 0;
+
+ if (file) {
+ is = new ifstream(file);
+ if (!is || ! (*is)) {
+ cerr << "Error in opening " << file<< " for input" << endl;
+ exit(1);
+ }
+ } else
+ is = &cin;
+
+ std::string s;
+
+ std::string::const_iterator start, end;
+ boost::smatch match;
+ boost::smatch what;
+ boost::match_flag_type flags;
+
+ // the regexp state we try at the moment.
+ RegExpStatePtr alternative;
+
+ // the regexp state matching best;
+ RegExpStatePtr best_alternative;
+
+ initial_state = currentstate;
+
+ fileinfo->line = 1;
+
+ // for selecting the formatter
+ int index_of_formatter = 0;
+ // for selecting the subexpression (or the whole expression)
+ int index_of_subexpression = 0;
+
+ int smallest_prefix = -1;
+ int biggest_length = -1;
+
+ string prefix;
+
+ load_line_ret ret;
+ while ((ret = load_line(s, *is)) != FOUND_EOF) {
+ bool matched = true;
+ bool found_eol = false;
+ start = s.begin();
+ end = s.end();
+ // reset the flags
+ flags = boost::match_default;
+
+ // always start with the current state
+ alternative = currentstate;
+
+ while (matched) {
+ matched = false;
+
+ if (alternative->has_alternative()) {
+ // this means that the state contains a list of alternative states
+ // so we must try to match all the states and use the one that matches best
+ // i.e., with the smallest prefix and the biggest match length
+ smallest_prefix = -1;
+ biggest_length = -1;
+ while (alternative.get()) {
+ if (boost::regex_search(start, end, match,
+ alternative->reg_exp, flags)) {
+ const std::string &match_prefix = match.prefix();
+ if (smallest_prefix < 0 || (boost::smatch::size_type)smallest_prefix >= match_prefix.size()) {
+ if ((boost::smatch::size_type)smallest_prefix > match_prefix.size()|| biggest_length < 0 || (boost::smatch::difference_type)biggest_length < match.length()) {
+ prefix = match.prefix();
+ smallest_prefix = match_prefix.size();
+ biggest_length = match.length();
+ best_alternative = alternative;
+ matched = true;
+ // copy it, otherwise the next call will overwrite it
+ what = match;
+ }
+ }
+ }
+ alternative = alternative->alternative;
+ }
+
+ if (matched) {
+ // store the one that matched best
+ alternative = best_alternative;
+ } else {
+ // reset the original current_state
+ alternative = currentstate;
+ }
+ } else if (boost::regex_search(start, end, what,
+ alternative->reg_exp, flags)) {
+ // otherwise, all the alternatives of the state are stored as
+ // a big alternation, where all alternatives are grouped
+ prefix = what.prefix();
+ matched = true;
+ }
+
+ if (matched) {
+ if (alternative->hasMarkedAlternatives) {
+ // we must inspect all the sub_matches
+ // to find the subexpression that matched
+ for (unsigned int i = 1; i < what.size(); ++i) {
+ if (what[i].matched) {
+ index_of_formatter = i;
+ index_of_subexpression = i;
+ break; // no other match is possible
+ }
+ }
+ } else {
+ // the alternative state contains only one expression, with
+ // marked subexpressions, so we must format the whole match
+
+ // the formatter at 0 is the normal formatter
+ index_of_formatter = 1;
+ // we select the whole match
+ index_of_subexpression = 0;
+
+ // this is OK also in the case when allAlternativesCanMatch:
+ // we consider the whole expression, and all formatters share the
+ // same exit state, so we can use the first one
+ }
+
+ // part that possibly did not match
+ if (prefix.size())
+ format(-1, alternative, prefix);
+
+ if (alternative->allAlternativesCanMatch) {
+ // we must format each subexpression that matched
+ for (unsigned int i = 1; i < what.size(); ++i) {
+ if (what[i].matched) {
+ format(i, alternative, what[i]);
+ }
+ }
+
+ // only the last formatter has the correct next state
+ index_of_formatter = what.size() - 1;
+ } else {
+ // format the part that matched
+ format(index_of_formatter, alternative,
+ what[index_of_subexpression]);
+ }
+
+ if (alternative->formatters[index_of_formatter]->getNextState()) {
+ // we must enter another state
+ enterState(alternative, index_of_formatter);
+ } else if (alternative->formatters[index_of_formatter]->exit_state_level) {
+ if (alternative->formatters[index_of_formatter]->exit_all) {
+ // we must go back to the outermost state
+ exitAll();
+ } else {
+ // we must get back to exit_state_level states
+ exitState(alternative->formatters[index_of_formatter]->exit_state_level);
+ }
+ }
+
+ // now let's continue with what's left of the original input
+ start = what[index_of_subexpression].second;
+ if (!(*start)) {
+ if (found_eol)
+ matched = false; // we had already matched end of line
+
+ // we might need to match the eol itself, so let's perform another loop
+ found_eol = true;
+ }
+
+ if (what[0].first != what[0].second) {
+ // we actually consumed something, so the start of the string
+ // must not be interpreted as the beginning of the line
+ flags |= boost::match_not_bol;
+ }
+
+ // we always search for the next match by using the original current state
+ alternative = currentstate;
+ } else {
+ // format the non-matching part as normal
+ format(-1, alternative, string(start, end));
+ matched = false;
+ }
+ }
+
+ if (ret == FOUND_NL)
+ formatter->format_nl("\n");
+
+ (fileinfo->line)++;
+ }
+
+ // make sure we flush all the buffered parts
+ formatter->flush();
+
+ if (file)
+ delete is;
+
+ currentstate = initial_state; // reset the initial state
+}
+
+void RegExpEngine::enterState(RegExpStatePtr state, int index) {
+ states_stack.push(currentstate);
+ currentstate = state->formatters[index]->getNextState();
+}
+
+void RegExpEngine::exitState(int level) {
+ // remove additional levels
+ for (int l = 1; l < level; ++l)
+ states_stack.pop();
+
+ currentstate = states_stack.top();
+ states_stack.pop();
+}
+
+void RegExpEngine::exitAll() {
+ currentstate = initial_state;
+ states_stack = stack_of_states();
+}
+
+void RegExpEngine::format(int index, RegExpStatePtr state, const std::string &s) {
+ formatter->format(state->get_elem(index), s, fileinfo);
+}
+
diff --git a/src/regexpengine.h b/src/lib/regexpengine.h
index b2e9976..aefa716 100644
--- a/src/regexpengine.h
+++ b/src/lib/regexpengine.h
@@ -22,35 +22,34 @@ class TextFormatter;
#include "fileinfo.h"
/**
-the class that actually performs regular expression processing
+ the class that actually performs regular expression processing
-@author Lorenzo Bettini
-*/
-class RegExpEngine
-{
- protected:
+ @author Lorenzo Bettini
+ */
+class RegExpEngine {
+protected:
RegExpStatePtr currentstate, initial_state;
FileInfo *fileinfo;
- private:
+private:
TextFormatter *formatter;
typedef std::stack<RegExpStatePtr> stack_of_states;
stack_of_states states_stack;
- protected:
- virtual void enterState(int index);
+protected:
+ virtual void enterState(RegExpStatePtr state, int index);
virtual void exitState(int level);
virtual void exitAll();
- virtual void format(int index, const std::string &s);
+ virtual void format(int index, RegExpStatePtr state, const std::string &s);
public:
- RegExpEngine(RegExpStatePtr v, TextFormatter *pre, FileInfo *f) :
- currentstate(v), fileinfo(f), formatter(pre)
- {}
+ RegExpEngine(RegExpStatePtr v, TextFormatter *pre, FileInfo *f) :
+ currentstate(v), fileinfo(f), formatter(pre) {
+ }
- virtual ~RegExpEngine();
+ virtual ~RegExpEngine();
- void process_file(const char *file);
+ void process_file(const char *file);
};
typedef boost::shared_ptr<RegExpEngine> RegExpEnginePtr;
diff --git a/src/regexpenginedebug.cpp b/src/lib/regexpenginedebug.cpp
index 886af8d..7e8ad78 100644
--- a/src/regexpenginedebug.cpp
+++ b/src/lib/regexpenginedebug.cpp
@@ -25,11 +25,22 @@ RegExpEngineDebug::~RegExpEngineDebug()
{
}
+void printRegExpState(RegExpStatePtr state)
+{
+ cout << state->reg_exp;
+ RegExpStatePtr alternative = state->alternative;
+ while (alternative.get()) {
+ cout << "\n" << " " << alternative->reg_exp;
+ alternative = alternative->alternative;
+ }
+}
-void RegExpEngineDebug::enterState(int index)
+void RegExpEngineDebug::enterState(RegExpStatePtr state, int index)
{
- cout << "entering: " << currentstate->formatters[index]->getNextState()->reg_exp << endl;
- RegExpEngine::enterState(index);
+ cout << "entering: ";
+ printRegExpState(state->formatters[index]->getNextState());
+ cout << endl;
+ RegExpEngine::enterState(state, index);
//step();
}
@@ -44,27 +55,32 @@ void RegExpEngineDebug::exitState(int level)
{
RegExpEngine::exitState(level);
- cout << "exiting " << level << " level(s): " << currentstate->reg_exp << endl;
+ cout << "exiting " << level << " level(s): ";
+ printRegExpState(currentstate);
+ cout << endl;
//step();
}
void printInfo(const SubExpressionInfo &e)
{
+ if (!e.second.line)
+ return; // it concerns a subexpressions
+
cout << e.second.filename << ":" << e.second.line << ": "
<< e.first << endl;
}
-void RegExpEngineDebug::format(int index, const std::string& s)
+void RegExpEngineDebug::format(int index, RegExpStatePtr state, const std::string& s)
{
- RegExpEngine::format(index, s);
+ RegExpEngine::format(index, state, s);
if(index >= 0) {
unsigned int i = (unsigned int)index;
- assert(i <= currentstate->subExpressions.size());
+ assert(i <= state->subExpressions.size());
- printInfo(currentstate->subExpressions[i-1]);
+ printInfo(state->subExpressions[i-1]);
}
- cout << "formatting: \"" << s << "\" as " << currentstate->get_elem(index) << endl;
+ cout << "formatting: \"" << s << "\" as " << state->get_elem(index) << endl;
step();
}
diff --git a/src/regexpenginedebug.h b/src/lib/regexpenginedebug.h
index d14a5bd..b673d4f 100644
--- a/src/regexpenginedebug.h
+++ b/src/lib/regexpenginedebug.h
@@ -32,10 +32,10 @@ protected:
/// whether it's an interactive debug session
bool interactive;
- virtual void enterState(int index);
+ virtual void enterState(RegExpStatePtr state, int index);
virtual void exitAll();
virtual void exitState(int level);
- virtual void format(int index, const std::string& s);
+ virtual void format(int index, RegExpStatePtr state, const std::string& s);
/**
* Waits for a step command (if in interactive mode)
diff --git a/src/lib/regexpreprocessor.cpp b/src/lib/regexpreprocessor.cpp
index f0f7e5f..516c2e9 100644
--- a/src/lib/regexpreprocessor.cpp
+++ b/src/lib/regexpreprocessor.cpp
@@ -1,10 +1,10 @@
//
-// C++ Implementation: %{MODULE}
+// C++ Implementation: RegexPreProcessor
//
-// Description:
+// Description: performs operations or inspections on a string representing
+// a valid regular expression
//
-//
-// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR}
+// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 1999-2007
//
// Copyright: See COPYING file that comes with this distribution
//
@@ -16,71 +16,189 @@
using namespace std;
+// IMPORTANT: the following regular expressions assume that the
+// regular expression they try to match is a valid regular expression
+
+// matches character sets in a regular expression
const boost::regex char_set_exp("\\[([^\\|]*)\\]");
-RegexPreProcessor::RegexPreProcessor()
-{
-}
+// substitute a "(" with "(?:" if it's not followed by a ? and not preceeded by \\ char
+const boost::regex from("(\\\\\\()|(\\((?!\\?))");
+const string into = "(?1\\\\\\()(?2\\(\\?\\:)";
+// found actual marking parenthesis, i.e., not preceeded by \\ and not followed by ?
+const boost::regex paren("(?<!\\\\)\\((?!\\?)");
-RegexPreProcessor::~RegexPreProcessor()
-{
-}
+// regular expression matching a backreference, e.g., 1円 or inside a conditional (?(1)...)
+const boost::regex backreference("(\\\\[[:digit:]])|(\\(\\?\\([[:digit:]])");
const string
-RegexPreProcessor::preprocess(const string &s)
-{
- // substitute a "(" with "(?:" if it's not followed by a ? and not preceeded by \\ char
- boost::regex from("(\\\\\\()|(\\((?!\\?))");
- string into = "(?1\\\\\\()(?2\\(\\?\\:)";
+ subexpressions_info::ERR_OUTER_UNMARKED = "unmarked subexpressions are allowed only inside marked subexpressions";
+const string
+ subexpressions_info::ERR_NESTED_SUBEXP = "subexpressions of subexpressions are not allowed";
+const string subexpressions_info::ERR_UNBALANCED_PAREN = "unbalanced parenthesis";
+const string
+ subexpressions_info::ERR_OUTSIDE_SUBEXP = "parts outside marked subexpressions are not allowed";
- return boost::regex_replace(s, from, into, boost::match_default | boost::format_all);
+RegexPreProcessor::RegexPreProcessor() {
}
-const string
-_make_nonsensitive(const string &s)
-{
- ostringstream result;
+RegexPreProcessor::~RegexPreProcessor() {
+}
+
+const string RegexPreProcessor::preprocess(const string &s) {
+ // substitute a "(" with "(?:" if it's not followed by a ? and not preceeded by \\ char
+ return boost::regex_replace(s, from, into,
+ boost::match_default | boost::format_all);
+}
+
+const string _make_nonsensitive(const string &s) {
+ ostringstream result;
- for (string::const_iterator it = s.begin(); it != s.end(); ++it)
- if (isalpha(*it))
- result << "[" << (char)toupper(*it) << (char)tolower(*it) << "]";
- else
- result << *it;
+ for (string::const_iterator it = s.begin(); it != s.end(); ++it)
+ if (isalpha(*it))
+ result << "[" << (char)toupper(*it) << (char)tolower(*it) << "]";
+ else
+ result << *it;
- return result.str();
+ return result.str();
}
-const string
-RegexPreProcessor::make_nonsensitive(const string &s)
-{
- boost::sregex_iterator m1(s.begin(), s.end(), char_set_exp);
- boost::sregex_iterator m2;
+const string RegexPreProcessor::make_nonsensitive(const string &s) {
+ boost::sregex_iterator m1(s.begin(), s.end(), char_set_exp);
+ boost::sregex_iterator m2;
- if (m1 == m2)
- return _make_nonsensitive(s);
+ if (m1 == m2)
+ return _make_nonsensitive(s);
- ostringstream buffer;
- string prefix;
- string suffix;
+ ostringstream buffer;
+ string prefix;
+ string suffix;
- for (boost::sregex_iterator it = m1; it != m2; ++it)
- {
- prefix = it->prefix();
- suffix = it->suffix();
+ for (boost::sregex_iterator it = m1; it != m2; ++it) {
+ prefix = it->prefix();
+ suffix = it->suffix();
- if (prefix.size()) {
- buffer << _make_nonsensitive(prefix);
+ if (prefix.size()) {
+ buffer << _make_nonsensitive(prefix);
+ }
+
+ buffer << (*it)[0];
}
- buffer << (*it)[0];
- }
+ if (suffix.size()) {
+ buffer << _make_nonsensitive(suffix);
+ }
- if (suffix.size()) {
- buffer << _make_nonsensitive(suffix);
- }
+ return buffer.str();
+}
- return buffer.str();
+unsigned int RegexPreProcessor::num_of_subexpressions(const string &s)
+{
+ boost::sregex_iterator m1(s.begin(), s.end(), paren);
+ boost::sregex_iterator m2;
+
+ int counter = 0;
+
+ for (boost::sregex_iterator it = m1; it != m2; ++it)
+ {
+ ++counter;
+ }
+
+ return counter;
}
+const subexpressions_strings *RegexPreProcessor::split_marked_subexpressions(const string &s) {
+ boost::sregex_iterator m1(s.begin(), s.end(), paren);
+ boost::sregex_iterator m2;
+
+ // we don't need to parse it (we can use the regex) since we assume that
+ // the regular expression represented by s is made up of only
+ // marked subexpressions and no nested subexpressions and char outside subexpressions
+
+ subexpressions_strings *split = new subexpressions_strings;
+
+ for (boost::sregex_iterator it = m1; it != m2; )
+ {
+ string prefix = it->prefix();
+ if (prefix.size())
+ split->push_back("(" + prefix);
+
+ string suffix = it->suffix();
+ if (++it == m2)
+ split->push_back("(" + suffix);
+ }
+
+ return split;
+}
+
+subexpressions_info RegexPreProcessor::num_of_marked_subexpressions(const string &s) {
+ subexpressions_info sexps;
+
+ // number of open parenthesis
+ int open_paren_num = 0;
+ // whether we're inside a marked subexpressions
+ bool found_marked_subexp = false;
+ // len of string
+ int len = s.size();
+ // char we're examining
+ char c;
+
+ for (int i = 0; i < len; ++i) {
+ c = s[i];
+ if (c == '\\' && (i+1) < len && (s[i+1] == '(' || s[i+1] == ')')) {
+ // skip the escaped paren
+ ++i;
+ } else if (c == '(') {
+ // we found a subexp
+ ++open_paren_num;
+
+ if ((i+1) < len && s[i+1] == '?') {
+ if (!found_marked_subexp) {
+ // outer subexpressions must be marked
+ sexps.errors = subexpressions_info::ERR_OUTER_UNMARKED;
+ return sexps;
+ }
+ } else {
+ // it's a marked subexp
+ if (found_marked_subexp) {
+ // we don't allow nested subexpressions
+ sexps.errors = subexpressions_info::ERR_NESTED_SUBEXP;
+ return sexps;
+ }
+
+ // we found a marked subexp
+ found_marked_subexp = true;
+ ++(sexps.marked);
+ }
+ } else if (c == ')') {
+ if (!open_paren_num) {
+ // unbalanced parenthesis
+ sexps.errors = subexpressions_info::ERR_UNBALANCED_PAREN;
+ return sexps;
+ }
+
+ --open_paren_num;
+
+ // end of marked subexp
+ if (!open_paren_num && found_marked_subexp)
+ found_marked_subexp = false;
+ } else {
+ // we don't allow non marked parts
+ if (!found_marked_subexp) {
+ sexps.errors = subexpressions_info::ERR_OUTSIDE_SUBEXP;
+ return sexps;
+ }
+ }
+ }
+
+ // check that all paren are closed
+ if (open_paren_num)
+ sexps.errors = subexpressions_info::ERR_UNBALANCED_PAREN;
+ return sexps;
+}
+
+bool RegexPreProcessor::contains_backreferences(const std::string &s) {
+ return boost::regex_search(s, backreference);
+}
diff --git a/src/lib/regexpreprocessor.h b/src/lib/regexpreprocessor.h
index 0842c6a..8625023 100644
--- a/src/lib/regexpreprocessor.h
+++ b/src/lib/regexpreprocessor.h
@@ -1,10 +1,10 @@
//
-// C++ Interface: %{MODULE}
+// C++ Interface: RegexPreProcessor
//
-// Description:
+// Description: performs operations or inspections on a string representing
+// a valid regular expression
//
-//
-// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR}
+// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 1999-2007
//
// Copyright: See COPYING file that comes with this distribution
//
@@ -13,20 +13,90 @@
#define REGEXPREPROCESSOR_H
#include <string>
+#include <list>
+
+/**
+ * info about subexpressions
+ */
+struct subexpressions_info {
+ /// errors for subexpression checking
+ const static std::string ERR_OUTER_UNMARKED;
+ const static std::string ERR_NESTED_SUBEXP;
+ const static std::string ERR_UNBALANCED_PAREN;
+ const static std::string ERR_OUTSIDE_SUBEXP;
+
+ /// num of marked subexpressions
+ unsigned int marked;
+ /// error specifications, if any
+ std::string errors;
+
+ subexpressions_info() : marked(0) {}
+};
+
+/**
+ * all the marked subexpressions in a list
+ */
+typedef std::list<std::string> subexpressions_strings;
/**
-preprocess a regular expression, e.g., transform "()" into "(?:)"
+ preprocess a regular expression, e.g., transform "()" into "(?:)"
-@author Lorenzo Bettini
-*/
-class RegexPreProcessor{
+ @author Lorenzo Bettini
+ */
+class RegexPreProcessor {
public:
RegexPreProcessor();
~RegexPreProcessor();
+ /**
+ * translates marked subexpressions (...) into non marked subexpressions (?: )
+ * @return the translated string
+ */
static const std::string preprocess(const std::string &s);
+
+ /**
+ * translates the expression into a case nonsensitive expression, i.e.,
+ * foo is translated into [Ff][Oo][Oo]
+ * @return the translated string
+ */
static const std::string make_nonsensitive(const std::string &s);
+
+ /**
+ * counts the number of marked subexpressions (...)
+ * @return the number of marked subexpressions
+ */
+ static unsigned int num_of_subexpressions(const std::string &s);
+
+ /**
+ * check that the expressions is made up of marked subexpressions (...)
+ * and no nested subexpressions and no char outside subexpressions
+ *
+ * @return the struct containing the number of marked subexpressions
+ * and possible errors
+ */
+ static subexpressions_info num_of_marked_subexpressions(const std::string &s);
+
+ /**
+ * Splits the marked subexpressions of a regular expression made up of only
+ * marked subexpressions and no nested subexpressions and char outside subexpressions
+ * (thus, before calling this, you must make sure that num_of_marked_subexpressions
+ * did not return an error.
+ *
+ * @return the subexpressions in a collection (this is allocated on the heap, so
+ * it is up to the caller to delete it)
+ */
+ static const subexpressions_strings *split_marked_subexpressions(const std::string &s);
+
+ /**
+ * Checks whether the passed regular expression string contains
+ * a backreference (e.g., either 1円 or a conditional with a backreference
+ * (?(1)...)
+ *
+ * @return true if the passed regular expression string contains
+ * a backreference
+ */
+ static bool contains_backreferences(const std::string &s);
};
#endif
diff --git a/src/lib/regexpstate.cpp b/src/lib/regexpstate.cpp
new file mode 100644
index 0000000..af308ef
--- /dev/null
+++ b/src/lib/regexpstate.cpp
@@ -0,0 +1,207 @@
+//
+// C++ Implementation: RegExpState
+//
+// Description: as regular expression state: contains the regular expression to
+// match and the formatters for each alternative.
+//
+//
+// Author: Lorenzo Bettini, http://www.lorenzobettini.it, (C) 1999-2007
+//
+// Copyright: See COPYING file that comes with this distribution
+//
+//
+#include "regexpstate.h"
+
+#include "messages.h"
+
+#include <stdlib.h>
+
+using namespace std;
+
+int RegExpState::global_id = 1;
+
+RegExpFormatter::RegExpFormatter(const string &el, RegExpStatePtr r, int exit,
+ bool all) :
+ elem(el), exit_state_level(exit), exit_all(all), next_state(r) {
+}
+
+void RegExpFormatter::setNextState(RegExpStatePtr r) {
+ next_state_strong = r;
+}
+
+RegExpStatePtr RegExpFormatter::getNextState() const {
+ RegExpStatePtr next = next_state.lock();
+ if (!next)
+ return next_state_strong;
+
+ return next;
+}
+
+/**
+ * Return the formatter associated to the passed index.
+ * If the index is negative, it returns the default formatter.
+ * @param index
+ * @return
+ */
+const string &RegExpState::get_elem(int index) {
+ return formatters[(index<0 ? 0 : index)]->elem;
+}
+
+void RegExpState::add_exp(const std::string &s, ParserInfo *parserInfo,
+ RegExpFormatterPtr f) {
+ if (alternative.get()) {
+ alternative->add_exp(s, parserInfo, f);
+ return;
+ } else {
+ if (nextAddMustCreateAnAlternative) {
+ // a previous operation had recorded the fact that the next
+ // add_exp should have been performed as a creation of an alternative
+ add_alternative(s, parserInfo, f);
+
+ // but further addition on the alternative must not create further alternatives
+ alternative->nextAddMustCreateAnAlternative = false;
+ return;
+ }
+ }
+
+ const string &ex = buffer.str();
+ if (ex.size())
+ buffer << "|";
+
+ buffer << s;
+
+ formatters.push_back(f);
+ subExpressions.push_back(make_pair(s, *parserInfo));
+}
+
+void RegExpState::add_exp(const std::string &s, ParserInfo *parserInfo,
+ const format_vector &f) {
+ if (alternative.get()) {
+ alternative->add_exp(s, parserInfo, f);
+ return;
+ }
+
+ RegExpState *myalternative;
+
+ if (buffer.str()== "") {
+ // we must not add an alternative: this state is the alternative
+ myalternative = this;
+ } else {
+ // we must isolate this regexp in an alternative
+ alternative = RegExpStatePtr(new RegExpState);
+
+ myalternative = alternative.get();
+ }
+
+ // the default formatter could be already set, and so we must
+ // transmit it right now, otherwise the alternative will never have it.
+ if (formatters[0].get()) {
+ myalternative->set_default_formatter(formatters[0]);
+ }
+
+ // another add exp on the alternative must create another alternative
+ myalternative->nextAddMustCreateAnAlternative = true;
+
+ myalternative->buffer << s;
+
+ // if the default formatter is not set, reserve space for it
+ if (myalternative->formatters[0].get())
+ ;
+
+ std::copy(f.begin(), f.end(), back_inserter(myalternative->formatters));
+
+ // add a subexpression for each formatter (just to be consistent with
+ // other cases).
+ for (format_vector::const_iterator it = f.begin(); it != f.end(); ++it)
+ myalternative->subExpressions.push_back(make_pair(s, *parserInfo));
+}
+
+void RegExpState::add_alternative(const std::string &s, ParserInfo *parserInfo,
+ RegExpFormatterPtr f) {
+ if (!alternative.get()) {
+ if (buffer.str()== "") {
+ // we must not add an alternative: this state is the alternative
+ add_exp(s, parserInfo, f);
+
+ // but the next add should be put in an alternative
+ nextAddMustCreateAnAlternative = true;
+ } else {
+ alternative = RegExpStatePtr(new RegExpState);
+
+ // the default formatter could be already set, and so we must
+ // transmit it right now, otherwise the alternative will never have it.
+ if (formatters[0].get()) {
+ alternative->set_default_formatter(formatters[0]);
+ }
+
+ // forward to the alternative
+ alternative->add_exp(s, parserInfo, f);
+
+ // another add exp on the alternative must create another alternative
+ alternative->nextAddMustCreateAnAlternative = true;
+ }
+ } else {
+ alternative->add_alternative(s, parserInfo, f);
+ }
+}
+
+void RegExpState::setHasMarkedAlternatives() {
+ if (alternative.get()) {
+ alternative->setHasMarkedAlternatives();
+ } else {
+ hasMarkedAlternatives = true;
+ }
+}
+
+void RegExpState::setAllAlternativesCanMatch() {
+ if (alternative.get()) {
+ alternative->setAllAlternativesCanMatch();
+ } else {
+ allAlternativesCanMatch = true;
+ }
+}
+
+void RegExpState::add_subexp_formatter(RegExpFormatterPtr f) {
+ if (alternative.get()) {
+ alternative->add_subexp_formatter(f);
+ return;
+ }
+
+ formatters.push_back(f);
+
+ // FIXME: insert a bogus subexpression
+ subExpressions.push_back(make_pair("", ParserInfo()));
+}
+
+void RegExpState::freeze() throw(boost::bad_expression) {
+ const string &buffered = buffer.str();
+ try {
+ reg_exp.assign(buffered);
+
+ // call freeze also on alternative
+ if (alternative.get()) {
+ alternative->freeze();
+ }
+ } catch (boost::bad_expression &e) {
+ printError("bad expression: " + buffered);
+ throw;
+ }
+}
+
+void RegExpState::set_default_formatter(RegExpFormatterPtr f) {
+ formatters[0] = f;
+
+ if (alternative.get())
+ alternative->set_default_formatter(f);
+}
+
+RegExpFormatterPtr RegExpState::getLastFormatter() const {
+ if (alternative.get())
+ return alternative->getLastFormatter();
+
+ return formatters[formatters.size()-1];
+}
+
+bool RegExpState::has_alternative() const {
+ return (alternative.get());
+}
diff --git a/src/lib/regexpstate.h b/src/lib/regexpstate.h
new file mode 100644
index 0000000..00085f4
--- /dev/null
+++ b/src/lib/regexpstate.h
@@ -0,0 +1,191 @@
+//
+// C++ Interface: RegExpState
+//
+// Description: as regular expression state: contains the regular expression to
+// match and the formatters for each alternative.
+//
+//
+// Author: Lorenzo Bettini, http://www.lorenzobettini.it, (C) 1999-2007
+//
+// Copyright: See COPYING file that comes with this distribution
+//
+//
+#ifndef REGEXPSTATE_H
+#define REGEXPSTATE_H
+
+#include <boost/regex.hpp>
+#include <boost/shared_ptr.hpp>
+#include <boost/weak_ptr.hpp>
+#include <deque>
+#include <vector>
+#include "my_sstream.h"
+#include "parserinfo.h"
+
+struct RegExpState;
+
+typedef boost::shared_ptr<RegExpState> RegExpStatePtr;
+typedef boost::weak_ptr<RegExpState> RegExpStatePtrW;
+
+struct RegExpFormatter
+{
+ const std::string elem; // the element represented
+ int exit_state_level; // how many states we must leave
+ bool exit_all;
+
+ RegExpFormatter(const std::string &el, RegExpStatePtr r = RegExpStatePtr(), int exit = 0, bool all = false);
+
+ void setNextState(RegExpStatePtr r);
+ RegExpStatePtr getNextState() const;
+
+ private:
+ RegExpStatePtrW next_state;
+ RegExpStatePtr next_state_strong;
+ /*
+ FIXME
+ the next_state is a weak pointer when there's a "nested" situation.
+ This allows to avoid cycles, that otherwise would prevent memory from
+ being correctly freed.
+ */
+};
+
+typedef boost::shared_ptr<RegExpFormatter> RegExpFormatterPtr;
+typedef std::deque<RegExpFormatterPtr> format_vector;
+typedef std::pair<std::string, ParserInfo> SubExpressionInfo;
+typedef std::vector<SubExpressionInfo> SubExpressions;
+
+/**
+class representing a state for the regular expression engine
+
+@author Lorenzo Bettini
+ */
+struct RegExpState
+{
+ static int global_id;
+ const int id; // the identifier of the state
+
+ /// the regular expression (with all the alternatives) for this state
+ boost::regex reg_exp;
+
+ /// for each alternative keep the parser info
+ SubExpressions subExpressions;
+
+ /// the formatters (one for each alternative)
+ format_vector formatters;
+
+ /// where to buffer the expression strings (added with add_exp)
+ /// when freeze is called these will be used to create the reg_exp
+ std::ostringstream buffer;
+
+ /// if the regular expression is not matched try with this alternative
+ /// state (this is used to split regular expressions where an alternative
+ /// has a back reference, since back references are limited to 9)
+ RegExpStatePtr alternative;
+
+ /**
+ * Records that the fact that if an add_exp is invoked on this object,
+ * then an alternative must be created and the exp should be inserted there
+ */
+ bool nextAddMustCreateAnAlternative;
+
+ /**
+ * Means that this state has a regular expression made up of (possible)
+ * many marked subexpressions each of one is an alternative, e.g.,
+ * (foo)|(#)|...
+ *
+ * This is crucial since, when formatting, we need to inspect each sub_match
+ * of match_result to find out which one matched (and so, which formatter to use).
+ */
+ bool hasMarkedAlternatives;
+
+ /**
+ * Means that this state has a regular expression made up of marked subexpressions
+ * where all of them can match, e.g.,
+ * (class)([[:blank:]]*)([[:alnum:]]+)
+ *
+ * This is crucial since, when formatting, we need to inspect each sub_match of
+ * match_result to find out all those that matched. This is different from the
+ * case of hasMarkedAlternatives: in that case only one can match
+ */
+ bool allAlternativesCanMatch;
+
+ RegExpState() :
+ id(global_id++), formatters(1), alternative(RegExpStatePtr()),
+ nextAddMustCreateAnAlternative(false),
+ hasMarkedAlternatives(false),
+ allAlternativesCanMatch(false) {}
+
+ const std::string &get_elem(int index = -1);
+
+ /**
+ * Adds the formatter for the given regular expression (and the file info
+ * of the original language definition file).
+ *
+ * The expression is only buffered (i.e., the regular expression is not built
+ * untile freeze is called)
+ *
+ * @param s the regular expression string
+ * @param parserInfo
+ * @param f
+ */
+ void add_exp(const std::string &s, ParserInfo *parserInfo, RegExpFormatterPtr f);
+
+ /**
+ * Adds the formatters for the given regular expression (and the file info
+ * of the original language definition file). Each formatter of the passed
+ * vector is related to the corresponding marked subexpression of the passed expression.
+ *
+ * The expression is only buffered (i.e., the regular expression is not built
+ * untile freeze is called)
+ *
+ * @param s the regular expression string
+ * @param parserInfo
+ * @param f
+ */
+ void add_exp(const std::string &s, ParserInfo *parserInfo, const format_vector &f);
+
+ /**
+ * Basically the same as add_exp, but
+ * 1. creates the alternative RegExpState
+ * 2. forwards all the operations to it from now on
+ */
+ void add_alternative(const std::string &s, ParserInfo *parserInfo, RegExpFormatterPtr f);
+
+ /**
+ * Sets that we added a marked subexpression
+ */
+ void setHasMarkedAlternatives();
+
+ /**
+ * Sets that we added a subexpression where all alternatives can match
+ */
+ void setAllAlternativesCanMatch();
+
+ /**
+ * Adds the formatter for a (marked) subexpression
+ *
+ * @param f
+ */
+ void add_subexp_formatter(RegExpFormatterPtr f);
+
+ /**
+ * Actually build the regular expression from the buffered strings
+ */
+ void freeze() throw(boost::bad_expression);
+
+ /**
+ * The default formatter in case the regular expression is not matched
+ */
+ void set_default_formatter(RegExpFormatterPtr f);
+
+ /**
+ * @return the last formatter
+ */
+ RegExpFormatterPtr getLastFormatter() const;
+
+ /**
+ * @return whether this state has an alternative
+ */
+ bool has_alternative() const;
+};
+
+#endif
diff --git a/src/lib/regexpstatebuilder.H b/src/lib/regexpstatebuilder.H
index 5e3fcb1..c47ddc6 100644
--- a/src/lib/regexpstatebuilder.H
+++ b/src/lib/regexpstatebuilder.H
@@ -1,10 +1,10 @@
//
-// C++ Interface: %{MODULE}
+// C++ Interface: RegExpStateBuilder
//
-// Description:
+// Description: Builds the RegExpStates starting from all the language elements.
+//
//
-//
-// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR}
+// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2007
//
// Copyright: See COPYING file that comes with this distribution
//
@@ -34,6 +34,7 @@ build a RegExpState starting from language definitions
class DelimitedLangElem; // file: delimitedlangelem.h
class LangElem; // file: langelem.h
class LangElems; // file: langelems.h
+class NamedSubExpsLangElem; // file: namedsubexpslangelem.h
class RegExpStatePointer; // file: regexpstatebuilder.h
class StateLangElem; // file: statelangelem.h
class StateStartLangElem; // file: statestartlangelem.h
@@ -66,8 +67,10 @@ virtual void build(DelimitedLangElem * elem, RegExpStatePointer state);
#line 50 "regexpstatebuilder.h"
virtual void build(StateStartLangElem * elem, RegExpStatePointer state);
#line 51 "regexpstatebuilder.h"
-virtual void build(LangElem * elem, RegExpStatePointer state);
+virtual void build(NamedSubExpsLangElem * elem, RegExpStatePointer state);
#line 52 "regexpstatebuilder.h"
+virtual void build(LangElem * elem, RegExpStatePointer state);
+#line 53 "regexpstatebuilder.h"
virtual void build(LangElems * elems, RegExpStatePointer state);
public:
void _forward_build(DelimitedLangElem * elem, RegExpStatePointer state)
@@ -85,6 +88,11 @@ void _forward_build(LangElems * elems, RegExpStatePointer state)
build(elems, state);
};
+void _forward_build(NamedSubExpsLangElem * elem, RegExpStatePointer state)
+{
+ build(elem, state);
+};
+
void _forward_build(StateLangElem * elem, RegExpStatePointer state)
{
build(elem, state);
@@ -103,7 +111,7 @@ void _forward_build(StringListLangElem * elem, RegExpStatePointer state)
protected:
virtual void build_DB(LangElem * elem, RegExpStatePointer state);
virtual void build_DB(LangElems * elems, RegExpStatePointer state);
-#line 52 "regexpstatebuilder.h"
+#line 53 "regexpstatebuilder.h"
// doublecpp: end, DO NOT MODIFY
diff --git a/src/lib/regexpstatebuilder.cpp b/src/lib/regexpstatebuilder.cpp
index f98e37f..aeac068 100644
--- a/src/lib/regexpstatebuilder.cpp
+++ b/src/lib/regexpstatebuilder.cpp
@@ -1,10 +1,10 @@
//
-// C++ Implementation: %{MODULE}
+// C++ Implementation: regexpstatebuilder.cpp
//
-// Description:
+// Description: Builds the regexp automaton
//
//
-// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR}
+// Author: Lorenzo Bettini, 2007, http://www.lorenzobettini.it
//
// Copyright: See COPYING file that comes with this distribution
//
@@ -17,6 +17,7 @@
#include "statelangelem.h"
#include "stringlistlangelem.h"
#include "delimitedlangelem.h"
+#include "namedsubexpslangelem.h"
#include "regexpstate.h"
#include "stringdef.h"
#include "tostringcollection.h"
@@ -27,17 +28,22 @@
using namespace std;
static const string buildex(const string &s);
-static const string buildex_pre(const string &s);
-void add_exp(RegExpStatePtr state, const string &orig, const string &exp, ParserInfo *parserInfo, RegExpFormatterPtr f)
+void add_exp(RegExpStatePtr state, const string &exp, ParserInfo *parserInfo, RegExpFormatterPtr f)
{
- try {
- state->add_exp(exp, parserInfo, f);
- } catch (boost::bad_expression &e) {
- exitError("wrong original expression: " + orig);
- }
-}
+ unsigned int numOfSubexpressions = RegexPreProcessor::num_of_subexpressions(exp);
+ if (numOfSubexpressions) {
+ // for marked subexpressions we must not use buildex, otherwise we might change
+ // the subexpressions indexes (e.g., for backreferences)
+ state->add_alternative(exp, parserInfo, f);
+ } else {
+ state->add_exp(buildex(exp), parserInfo, f);
+
+ // record that we (manually) added an explicit marked subexpression
+ state->setHasMarkedAlternatives();
+ }
+}
/**
* Definitely associate the regular expression to this state
@@ -64,6 +70,22 @@ RegExpStateBuilder::~RegExpStateBuilder()
{
}
+void setFormatterExitLevel(StateStartLangElem *elem, RegExpFormatterPtr formatter) {
+ bool exit_all = elem->exitAll();
+ bool exit = elem->doExit();
+
+ /*
+ only act on the exit state (if any exist statement is defined)
+ */
+ if (exit_all) {
+ formatter->exit_state_level = 1;
+ formatter->exit_all = true;
+ }
+
+ if (exit)
+ formatter->exit_state_level = 1;
+}
+
RegExpStatePtr
RegExpStateBuilder::build(LangElems *elems)
{
@@ -93,11 +115,11 @@ RegExpStateBuilder::build(LangElems *elems, RegExpStatePointer state)
// try to find out where the problem is...
RegExpStatePtr temp_state(new RegExpState());
for (LangElems::const_iterator it = elems->begin(); it != elems->end(); ++it) {
- build(*it, temp_state);
+ build_DB(*it, temp_state);
try {
temp_state->freeze();
} catch (boost::bad_expression &e) {
- exitError("problem in this expression: " + (*it)->toString());
+ exitError("problem in this expression: " + (*it)->toStringOriginal(), *it);
}
}
} else {
@@ -111,23 +133,24 @@ RegExpStateBuilder::build(LangElem *elem, RegExpStatePointer state)
}
/**
- * Build a subexpression starting from s
+ * Build a non-marking group (i.e., (? ... ) starting from s
* @param s
* @return
*/
-const string buildex(const string &s)
+const string non_marking_group(const string &s)
{
- return "(" + s + ")";
+ return "(?:" + s + ")";
}
+
/**
- * Build a subexpression starting from s, after preprocessing s
+ * Build a subexpression starting from s
* @param s
* @return
*/
-const string buildex_pre(const string &s)
+const string buildex(const string &s)
{
- return buildex(RegexPreProcessor::preprocess(s));
+ return "(" + s + ")";
}
/**
@@ -185,17 +208,59 @@ RegExpStateBuilder::build(StringListLangElem *elem, RegExpStatePointer state)
if (!elem->isCaseSensitive())
stringdef = RegexPreProcessor::make_nonsensitive(stringdef);
- string exp_string = buildex_pre(stringdef);
+ string exp_string = non_marking_group(stringdef);
if (isToIsolate)
exp_string = buildex_isolated(exp_string);
RegExpFormatterPtr formatter(new RegExpFormatter(name));
- add_exp(state, exp_string, buildex_pre(exp_string), elem, formatter);
+ add_exp(state, exp_string, elem, formatter);
build(static_cast<StateStartLangElem *>(elem), state);
}
/**
+ * Case of a list of language elements, each representing a
+ * marked subexpression
+ * @param elem
+ * @param state
+ */
+void
+RegExpStateBuilder::build(NamedSubExpsLangElem *elem, RegExpStatePointer state)
+{
+ const ElementNames *elems = elem->getElementNames();
+ const StringDef *regexp = elem->getRegexpDef();
+ format_vector formatters;
+ const string &regexp_string = regexp->toString();
+
+ // first check that the number of marked subexpressions is the same of
+ // the specified element names
+ subexpressions_info sexps = RegexPreProcessor::num_of_marked_subexpressions(regexp_string);
+
+ if (sexps.errors.size()) {
+ exitError(sexps.errors, elem);
+ }
+
+ if (sexps.marked != elems->size()) {
+ exitError("number of marked subexpressions does not match number of elements", elem);
+ }
+
+ // for each named group build a formatter, that corresponds to that element
+ for (ElementNames::const_iterator it = elems->begin(); it != elems->end(); ++it) {
+ RegExpFormatterPtr formatter = RegExpFormatterPtr(new RegExpFormatter(*it));
+ // each formatter will share the same exit level, since it represents the
+ // same matched regexp
+ setFormatterExitLevel(elem, formatter);
+ formatters.push_back(formatter);
+ }
+
+ // now add all the formatters for this element
+ state->add_exp(regexp_string, elem, formatters);
+
+ // record that all the subexpressions can match
+ state->setAllAlternativesCanMatch();
+}
+
+/**
* Case of a delimited element
* @param elem
* @param state
@@ -242,10 +307,10 @@ RegExpStateBuilder::build(DelimitedLangElem *elem, RegExpStatePointer state)
"<(?:[^<>])*>"
*/
if (!escape) {
- exp_string = start_string + "([^" +
+ exp_string = start_string + non_marking_group("[^" +
start_string +
(end_string != start_string ? end_string : "") +
- "])*" + end_string;
+ "]") + "*" + end_string;
} else {
/*
in case of a specified escape character it will use it for the
@@ -255,12 +320,12 @@ RegExpStateBuilder::build(DelimitedLangElem *elem, RegExpStatePointer state)
<(?:[^\\<\\>]|\\.)*>
*/
- exp_string = start_string + "([^" +
+ exp_string = start_string + non_marking_group("[^" +
escape_string +
start_string +
(end_string != start_string ? escape_string + end_string : "") +
- "]|"+ escape_string + "." +
- ")*" + end_string;
+ "]|"+ escape_string + ".") +
+ "*" + end_string;
}
} else {
/*
@@ -300,7 +365,7 @@ RegExpStateBuilder::build(DelimitedLangElem *elem, RegExpStatePointer state)
1 + (elem->doExit() ? 1 : 0),
elem->exitAll()));
if (end)
- add_exp(inner, end_string, buildex_pre(end_string), elem, exit);
+ add_exp(inner, end_string, elem, exit);
else
inner->add_exp(buildex("\\z"), elem, exit);
@@ -313,7 +378,7 @@ RegExpStateBuilder::build(DelimitedLangElem *elem, RegExpStatePointer state)
(\*\])|(\\.)
*/
if (escape) {
- add_exp(inner, escape_string, buildex_pre(escape_string + "."),
+ add_exp(inner, escape_string + ".",
elem,
RegExpFormatterPtr(new RegExpFormatter(name)));
}
@@ -339,16 +404,17 @@ RegExpStateBuilder::build(DelimitedLangElem *elem, RegExpStatePointer state)
if (elem->isNested()) {
RegExpFormatterPtr nested(new RegExpFormatter(name, inner));
nested_formatters.push_back(nested);
- add_exp(inner, start_string, buildex_pre(start_string), elem, nested);
+ add_exp(inner, start_string, elem, nested);
}
}
if (inner) {
- freeze_state(inner);
+ if (!freeze_state(inner))
+ foundBug("bug in expression parsing", __FILE__, __LINE__);;
}
RegExpFormatterPtr formatter(new RegExpFormatter(name, inner));
- add_exp(state, exp_string, buildex_pre(exp_string), elem, formatter);
+ add_exp(state, exp_string, elem, formatter);
build(static_cast<StateStartLangElem *>(elem), state);
}
@@ -363,19 +429,7 @@ RegExpStateBuilder::build(StateStartLangElem *elem, RegExpStatePointer state)
{
RegExpFormatterPtr formatter = state->getLastFormatter();
- bool exit_all = elem->exitAll();
- bool exit = elem->doExit();
-
- /*
- only act on the exit state (if any exist statement is defined)
- */
- if (exit_all) {
- formatter->exit_state_level = 1;
- formatter->exit_all = true;
- }
-
- if (exit)
- formatter->exit_state_level = 1;
+ setFormatterExitLevel(elem, formatter);
}
/**
diff --git a/src/lib/regexpstatebuilder.h b/src/lib/regexpstatebuilder.h
index 3fc05fb..f3ed3ff 100644
--- a/src/lib/regexpstatebuilder.h
+++ b/src/lib/regexpstatebuilder.h
@@ -1,10 +1,10 @@
//
-// C++ Interface: %{MODULE}
+// C++ Interface: RegExpStateBuilder
//
-// Description:
+// Description: Builds the RegExpStates starting from all the language elements.
+//
//
-//
-// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR}
+// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2007
//
// Copyright: See COPYING file that comes with this distribution
//
@@ -48,6 +48,7 @@ class RegExpStateBuilder
void (StringListLangElem *elem, RegExpStatePointer state);
void (DelimitedLangElem *elem, RegExpStatePointer state);
void (StateStartLangElem *elem, RegExpStatePointer state);
+ void (NamedSubExpsLangElem *elem, RegExpStatePointer state);
void (LangElem *elem, RegExpStatePointer state);
void (LangElems *elems, RegExpStatePointer state);
endbranches
diff --git a/src/lib/regexpstatebuilder_dbtab.cc b/src/lib/regexpstatebuilder_dbtab.cc
index 9dd404c..56b58c5 100644
--- a/src/lib/regexpstatebuilder_dbtab.cc
+++ b/src/lib/regexpstatebuilder_dbtab.cc
@@ -4,6 +4,8 @@
#include "statelangelem.h"
+#include "namedsubexpslangelem.h"
+
#include "delimitedlangelem.h"
#include "langelems.h"
@@ -43,6 +45,12 @@ LangElems::dispatch_build(RegExpStateBuilder *RegExpStateBuilder_o, RegExpStateP
}
void
+NamedSubExpsLangElem::dispatch_build(RegExpStateBuilder *RegExpStateBuilder_o, RegExpStatePointer state)
+{
+ RegExpStateBuilder_o->_forward_build(this, state);
+}
+
+void
StateLangElem::dispatch_build(RegExpStateBuilder *RegExpStateBuilder_o, RegExpStatePointer state)
{
RegExpStateBuilder_o->_forward_build(this, state);
diff --git a/src/lib/regexpstateprinter.cpp b/src/lib/regexpstateprinter.cpp
index 9a56b91..be5abb2 100644
--- a/src/lib/regexpstateprinter.cpp
+++ b/src/lib/regexpstateprinter.cpp
@@ -10,6 +10,7 @@
//
//
#include "regexpstateprinter.h"
+#include "regexpreprocessor.h"
#include <iostream>
@@ -33,21 +34,54 @@ void RegExpStatePrinter::printRegExpState(RegExpStatePtr state)
{
do_indent;
cout << " STATE " << state->id << endl;
+ do_indent;
+ cout << " regexp " << state->reg_exp <<
+ (state->has_alternative() ? " (has alternatives)" : "") << endl;
inc_indent;
- int i = 0;
- for (format_vector::const_iterator it = state->formatters.begin();
- it != state->formatters.end(); ++it)
- {
- do_indent;
- cout << i << ": " << (*it)->elem << " "
- << (i > 0 ? state->subExpressions[i-1].first : "");
- printRegExpFormatter(*it);
- ++i;
+ unsigned int i = 0;
+ if (state->allAlternativesCanMatch) {
+ // print the default formatter
+ do_indent;
+ cout << i << ": " << state->formatters[0]->elem << " ";
+ printRegExpFormatter(state->formatters[0]);
+
+ // we need to get all the subexpressions
+ const subexpressions_strings *split = RegexPreProcessor::split_marked_subexpressions(state->reg_exp.str());
+
+ i = 1;
+ for (subexpressions_strings::const_iterator it = split->begin(); it != split->end(); ++it) {
+ do_indent;
+ cout << i << ": " << state->formatters[i]->elem << " "
+ << *it ;
+ ++i;
+ if (i < state->formatters.size())
+ cout << endl;
+ }
+
+ // and print only the last state which has all the next state
+ // and exit level information
+ do_indent;
+ printRegExpFormatter(state->formatters[i-1]);
+
+ delete split;
+ } else {
+ for (format_vector::const_iterator it = state->formatters.begin();
+ it != state->formatters.end(); ++it)
+ {
+ do_indent;
+ cout << i << ": " << (*it)->elem << " "
+ << (i > 0 ? state->subExpressions[i-1].first : "");
+ printRegExpFormatter(*it);
+ ++i;
+ }
}
dec_indent;
+
+ if (state->alternative.get())
+ printRegExpState(state->alternative);
}
void RegExpStatePrinter::printRegExpFormatter(RegExpFormatterPtr formatter)
diff --git a/src/lib/statelangelem.cpp b/src/lib/statelangelem.cpp
index 254b015..13d4c05 100644
--- a/src/lib/statelangelem.cpp
+++ b/src/lib/statelangelem.cpp
@@ -38,3 +38,11 @@ StateLangElem::toString() const
return res;
}
+const std::string
+StateLangElem::toStringOriginal() const
+{
+ string res = statestartlangelem->toString();
+ if (langelems)
+ res += "\n" + langelems->toStringOriginal();
+ return res;
+}
diff --git a/src/lib/statelangelem.h b/src/lib/statelangelem.h
index 003b651..762d647 100644
--- a/src/lib/statelangelem.h
+++ b/src/lib/statelangelem.h
@@ -48,6 +48,8 @@ public:
virtual const std::string toString() const;
+ virtual const std::string toStringOriginal() const;
+
StateStartLangElem *getStateStart() const { return statestartlangelem; }
bool isState() const { return state; }
LangElems *getElems() const { return langelems; }
diff --git a/src/lib/stringdef.cpp b/src/lib/stringdef.cpp
deleted file mode 100644
index e1ba02f..0000000
--- a/src/lib/stringdef.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-//
-// C++ Implementation: %{MODULE}
-//
-// Description:
-//
-//
-// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR}
-//
-// Copyright: See COPYING file that comes with this distribution
-//
-//
-#include "stringdef.h"
-
-StringDef::StringDef(const char *s) :
- stringdef(s)
-{
-}
-
-StringDef::StringDef(const std::string &s) :
- stringdef(s)
-{
-}
-
-StringDef::~StringDef()
-{
-}
-
-
diff --git a/src/lib/stringdef.h b/src/lib/stringdef.h
index 48ec110..483307e 100644
--- a/src/lib/stringdef.h
+++ b/src/lib/stringdef.h
@@ -1,10 +1,10 @@
//
-// C++ Interface: %{MODULE}
+// C++ Interface: StringDef
//
-// Description:
+// Description: a string definition that is used by all the language elements.
//
//
-// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR}
+// Author: Lorenzo Bettini, 1999-2007 <http://www.lorenzobettini.it>
//
// Copyright: See COPYING file that comes with this distribution
//
@@ -16,34 +16,55 @@
#include <list>
/**
-represent a string for a language element
+ represent a string for a language element
-@author Lorenzo Bettini
-*/
-class StringDef
-{
- private:
+ @author Lorenzo Bettini
+ */
+class StringDef {
+private:
+ /// the actual content
std::string stringdef;
-
- public:
- StringDef(const char *s);
- StringDef(const std::string &s);
-
- ~StringDef();
-
- const std::string toString() const { return stringdef; }
- static StringDef *concat(const StringDef *s1, const StringDef *s2)
- { return new StringDef(s1->stringdef + s2->stringdef); }
+ /// the original representation (without any preprocessing)
+ std::string orig;
+
+public:
+ /**
+ * constructs a StringDef and store also the original representation
+ * @param s the actual content
+ * @param o the original representation
+ */
+ StringDef(const std::string &s, const std::string &o) :
+ stringdef(s), orig(o) {
+ }
+
+ StringDef(const std::string &s) :
+ stringdef(s) {
+ }
+
+ const std::string toString() const {
+ return stringdef;
+ }
+
+ /**
+ * return the original representation (this is useful for printing errors)
+ * @return the original representation
+ */
+ const std::string toStringOriginal() const {
+ return orig;
+ }
+
+ static StringDef *concat(const StringDef *s1, const StringDef *s2) {
+ return new StringDef(s1->stringdef + s2->stringdef);
+ }
};
typedef std::list<StringDef *> StringDefsBase;
-class StringDefs : public StringDefsBase
-{
- public:
+class StringDefs : public StringDefsBase {
+public:
~StringDefs() {
- for (StringDefsBase::iterator it = begin(); it != end(); ++it)
- delete *it;
+ for (StringDefsBase::iterator it = begin(); it != end(); ++it)
+ delete *it;
}
};
diff --git a/src/lib/stringlistlangelem.cpp b/src/lib/stringlistlangelem.cpp
index 8f0e266..59f5be6 100644
--- a/src/lib/stringlistlangelem.cpp
+++ b/src/lib/stringlistlangelem.cpp
@@ -30,7 +30,13 @@ StringListLangElem::~StringListLangElem()
const std::string
StringListLangElem::toString() const
{
- string res = StateStartLangElem::toString() + " " + toStringCollection<StringDefs>(alternatives);;
+ string res = StateStartLangElem::toString() + " " + toStringCollection<StringDefs>(alternatives);
return res;
}
+const std::string
+StringListLangElem::toStringOriginal() const
+{
+ string res = StateStartLangElem::toString() + " " + toStringOriginalCollection<StringDefs>(alternatives);
+ return res;
+}
diff --git a/src/lib/stringlistlangelem.h b/src/lib/stringlistlangelem.h
index 7b3fc8e..90a3baa 100644
--- a/src/lib/stringlistlangelem.h
+++ b/src/lib/stringlistlangelem.h
@@ -38,6 +38,8 @@ public:
virtual ~StringListLangElem();
virtual const std::string toString() const;
+
+ virtual const std::string toStringOriginal() const;
StringDefs *getAlternatives() const { return alternatives; }
bool isCaseSensitive() const { return !nonsensitive; }
diff --git a/src/lib/stylecssparser.yy b/src/lib/stylecssparser.yy
index 86b8659..8ba3697 100644
--- a/src/lib/stylecssparser.yy
+++ b/src/lib/stylecssparser.yy
@@ -23,8 +23,6 @@
#include <iostream>
#include <string>
-#include "my_sstream.h"
-
#include "generatorfactory.h"
#include "colors.h"
#include "keys.h"
@@ -51,6 +49,7 @@ static string bodyBgColor;
extern int stylecsssc_lex() ;
extern FILE *stylecsssc_in ;
+extern int stylecsssc_lex_destroy (void);
/// the global pointer to style constant for a specific element
static StyleConstantsPtr currentStyleConstants;
@@ -79,11 +78,11 @@ static string currentBGColor;
stylefile : { /* allow empty files */ }
| statements
;
-
+
statements : statements statement
| statement
;
-
+
statement : option
;
@@ -109,11 +108,11 @@ option : keylist
// check whether it's the body specification
if (Utils::tolower(key) == "body") {
updateBgColor(currentBGColor);
-
+
// notice that for text style specification for the body, the background
// is assumed for the entire document and not for the normal text
// following the semantics of css
-
+
// avoid adding an empty style definition for normal
if (currentColor != "" || currentStyleConstants->size()) {
if (!generatorFactory->createGenerator(NORMAL, currentColor, "", currentStyleConstants)) {
@@ -205,22 +204,24 @@ void parseCssStyles(const string &path, const string &name, GeneratorFactory *ge
printMessage_noln( "Parsing ", cerr ) ;
printMessage_noln (current_file, cerr);
printMessage( " file ...", cerr ) ;
-
+
bodyBgColor = "";
-
+
yyparse() ;
-
+
bodyBgColor_ = bodyBgColor;
-
+
printMessage( "Parsing done!", cerr ) ;
fclose(stylecsssc_in);
+
+ // release scanner memory
+ stylecsssc_lex_destroy();
}
void
yyerror( char *s )
{
parseStyleError(s);
- exit(EXIT_FAILURE);
}
void updateBgColor(const std::string &c)
diff --git a/src/lib/stylecssscanner.ll b/src/lib/stylecssscanner.ll
index 1806907..b1a6ae3 100644
--- a/src/lib/stylecssscanner.ll
+++ b/src/lib/stylecssscanner.ll
@@ -43,7 +43,6 @@ extern int line ;
#define DEB2(s,s2)
#endif
-
%}
%option prefix="stylecsssc_"
@@ -207,16 +206,6 @@ STRING \"[^\"\n]*\"
DEB2("CSS PROPERTIES discarding", yytext);
}
-<<EOF>> {
- DEB("reached EOF of the style file");
-
- DEB("freeing scanner memory");
- /* For non-reentrant C scanner only. */
- yy_delete_buffer(YY_CURRENT_BUFFER);
-
- yyterminate();
-}
-
<INITIAL>. { return yytext[0] ; }
%%
diff --git a/src/lib/styleparser.yy b/src/lib/styleparser.yy
index a203137..7f000b6 100644
--- a/src/lib/styleparser.yy
+++ b/src/lib/styleparser.yy
@@ -23,8 +23,6 @@
#include <iostream>
#include <string>
-#include "my_sstream.h"
-
#include "generatorfactory.h"
#include "colors.h"
#include "keys.h"
@@ -42,6 +40,7 @@ int line = 1 ;
extern int stylesc_lex() ;
extern FILE *stylesc_in ;
+extern int stylesc_lex_destroy (void);
static string bodyBgColor;
@@ -74,15 +73,15 @@ static GeneratorFactory *generatorFactory;
stylefile : { /* allow empty files */ }
| statements
;
-
+
statements : statements statement
| statement
;
-
+
statement : option
| bodybgcolor
;
-
+
option : keylist color bgcolor
{
printSequence( 1ドル ) ;
@@ -164,7 +163,7 @@ parseStyles(const string &path, const string &name, GeneratorFactory *genFactory
string &bodyBgColor_)
{
generatorFactory = genFactory;
-
+
// opens the file for yylex
stylesc_in = open_data_file_stream(path, name);
@@ -173,29 +172,33 @@ parseStyles(const string &path, const string &name, GeneratorFactory *genFactory
printMessage_noln( "Parsing ", cerr ) ;
printMessage_noln (current_file, cerr);
printMessage( " file ...", cerr ) ;
-
+
bodyBgColor = "";
-
+
yyparse() ;
-
+
bodyBgColor_ = bodyBgColor;
-
+
printMessage( "Parsing done!", cerr ) ;
fclose(stylesc_in);
+
+ // free scanner memory
+ stylesc_lex_destroy();
}
void
yyerror( char *s )
{
parseStyleError(s);
- exit(EXIT_FAILURE);
}
-void parseStyleError(const std::string &error)
+void parseStyleError(const std::string &error, bool exit)
{
- ostringstream str ;
- str << current_file << ":" << line << ": " << error;
- printError( str.str(), cerr ) ;
+ if (exit)
+ exitError(current_file, line, error);
+ else {
+ printError(current_file, line, error);
+ }
}
void updateBgColor(const std::string *c)
@@ -204,7 +207,7 @@ void updateBgColor(const std::string *c)
yyerror("bgcolor already defined");
else
bodyBgColor = *c;
-
+
// we don't need it anymore
delete c;
}
diff --git a/src/lib/stylescanner.ll b/src/lib/stylescanner.ll
index c5b5c19..0baf33f 100644
--- a/src/lib/stylescanner.ll
+++ b/src/lib/stylescanner.ll
@@ -60,7 +60,7 @@ IDE [a-zA-Z_]([a-zA-Z0-9_])*
STRING \"[^\"\n]*\"
-%s COMMENT_STATE STRING_STATE
+%s COMMENT_STATE STRING_STATE
%%
@@ -76,7 +76,7 @@ STRING \"[^\"\n]*\"
<INITIAL>\#[a-fA-F0-9]{6} {
const std::string wrong = yytext ;
- parseStyleError("use of direct colors has changed");
+ parseStyleError("use of direct colors has changed", false);
parseStyleError("use double quoted syntax: \"" + wrong + "\" instead of " + wrong);
exit(EXIT_FAILURE);
return COLOR ;
@@ -108,16 +108,6 @@ STRING \"[^\"\n]*\"
\n { ++line ; }
-<<EOF>> {
- DEB("reached EOF of the style file");
-
- DEB("freeing scanner memory");
- /* For non-reentrant C scanner only. */
- yy_delete_buffer(YY_CURRENT_BUFFER);
-
- yyterminate();
-}
-
<INITIAL>. { /* anything else will generate a parsing error */ return yytext[0] ; }
%%
diff --git a/src/lib/test_langinfer.cpp b/src/lib/test_langinfer.cpp
index b683cf3..a4c8994 100644
--- a/src/lib/test_langinfer.cpp
+++ b/src/lib/test_langinfer.cpp
@@ -43,6 +43,10 @@ main()
testInfer("#!/bin/sh\n# -*- tcl, as specified in Emacs -*-", "tcl");
+ // try with the env specification
+ testInfer("#! /usr/bin/env python", "python");
+ testInfer("#! /bin/env perl", "perl");
+
// just a small check for tolower
string s = "Lisp";
Utils::toLower(s);
diff --git a/src/lib/test_regexpreprocessor_main.cpp b/src/lib/test_regexpreprocessor_main.cpp
new file mode 100644
index 0000000..daa5a9d
--- /dev/null
+++ b/src/lib/test_regexpreprocessor_main.cpp
@@ -0,0 +1,167 @@
+/*
+ * Copyright (C) 2007 Lorenzo Bettini <http://www.lorenzobettini.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+/*
+ * This program is part of GNU source-highlight
+ *
+ * This tests regex preprocessing
+ */
+
+#include <iostream>
+#include <boost/regex.hpp>
+#include <algorithm>
+
+#include "asserttest.h"
+#include "regexpreprocessor.h"
+
+using namespace std;
+
+// variable to test results for all the tests
+int result = 0;
+
+void testPreprocess(const string &original, const string &expected) {
+ cout << "original : " << original << endl;
+
+ const string &preprocessed = RegexPreProcessor::preprocess(original);
+
+ cout << "preprocessed : " << preprocessed << endl;
+ result += assertEquals(expected, preprocessed);
+}
+
+void testMakeNonSensitive(const string &original, const string &expected) {
+ cout << "original : " << original << endl;
+
+ const string &preprocessed = RegexPreProcessor::make_nonsensitive(original);
+
+ cout << "preprocessed : " << preprocessed << endl;
+ result += assertEquals(expected, preprocessed);
+}
+
+void testOnlyNumOfMarkedSubexpressions(const string &original,
+ unsigned int expected) {
+ cout << "original : " << original << endl;
+
+ unsigned int found = RegexPreProcessor::num_of_subexpressions(original);
+
+ cout << "found : " << found << endl;
+ result += assertEquals(expected, found);
+}
+
+void testNumOfMarkedSubexpressions(const string &original,
+ unsigned int expected, const string &error = "") {
+ cout << "original : " << original << endl;
+
+ subexpressions_info
+ sexp = RegexPreProcessor::num_of_marked_subexpressions(original);
+ unsigned int found = sexp.marked;
+
+ cout << "found : " << found << endl;
+ if (sexp.errors.size())
+ cout << "error : " << sexp.errors << endl;
+ result += assertEquals(expected, found);
+ result += assertEquals(error, sexp.errors);
+}
+
+void testBackReference(const string &original, bool expected) {
+ cout << "original : " << original << endl;
+
+ bool found = RegexPreProcessor::contains_backreferences(original);
+
+ cout << "found : " << found << endl;
+ result += assertEquals(expected, found);
+}
+
+void testSplit(const string &original, const subexpressions_strings &expected) {
+ const subexpressions_strings *split;
+ split = RegexPreProcessor::split_marked_subexpressions(original);
+ cout << "split : ";
+ std::copy(split->begin(), split->end(), std::ostream_iterator<string>(cout));
+ cout << endl;
+ if (!std::equal(split->begin(), split->end(), expected.begin())) {
+ ++result;
+ cout << "are not equal!" << endl;
+ cout << "expected : ";
+ std::copy(expected.begin(), expected.end(),
+ std::ostream_iterator<string>(cout));
+ }
+}
+
+int main() {
+ cout << boolalpha;
+
+ testPreprocess("simple", "simple");
+ testPreprocess("(inside)", "(?:inside)");
+ testPreprocess("(dou(b)le)", "(?:dou(?:b)le)");
+
+ testMakeNonSensitive("foo", "[Ff][Oo][Oo]");
+
+ testOnlyNumOfMarkedSubexpressions("none", 0);
+ testOnlyNumOfMarkedSubexpressions("just (one)", 1);
+ testOnlyNumOfMarkedSubexpressions("(3 of (them)) just (one)", 3);
+
+ testOnlyNumOfMarkedSubexpressions("none \\(", 0);
+ testOnlyNumOfMarkedSubexpressions("(?: again) none \\(", 0);
+
+ testNumOfMarkedSubexpressions("none", 0,
+ subexpressions_info::ERR_OUTSIDE_SUBEXP);
+ testNumOfMarkedSubexpressions("just (one)", 0,
+ subexpressions_info::ERR_OUTSIDE_SUBEXP);
+ testNumOfMarkedSubexpressions("(3 of (them)) just (one)", 1,
+ subexpressions_info::ERR_NESTED_SUBEXP);
+
+ testNumOfMarkedSubexpressions("none \\(", 0,
+ subexpressions_info::ERR_OUTSIDE_SUBEXP);
+ testNumOfMarkedSubexpressions("(?: again) none \\(", 0,
+ subexpressions_info::ERR_OUTER_UNMARKED);
+
+ testNumOfMarkedSubexpressions("(just one)", 1);
+ testNumOfMarkedSubexpressions("(just one (?:some) and unmarked)", 1);
+ testNumOfMarkedSubexpressions("(just one \\( and escapes)", 1);
+ testNumOfMarkedSubexpressions("(just one \\( and \\) escapes)", 1);
+ testNumOfMarkedSubexpressions("(one) ", 1,
+ subexpressions_info::ERR_OUTSIDE_SUBEXP);
+
+ testNumOfMarkedSubexpressions("(one", 1,
+ subexpressions_info::ERR_UNBALANCED_PAREN);
+ testNumOfMarkedSubexpressions("(one))", 1,
+ subexpressions_info::ERR_UNBALANCED_PAREN);
+
+ testNumOfMarkedSubexpressions("(one)(two)((?:three)*)", 3);
+ testNumOfMarkedSubexpressions("(one) (two)", 1,
+ subexpressions_info::ERR_OUTSIDE_SUBEXP);
+
+ subexpressions_strings expected;
+ expected.push_back("(this)");
+ expected.push_back("(is)");
+ expected.push_back("(one)");
+ testSplit("(this)(is)(one)", expected);
+
+ expected.clear();
+ expected.push_back("(this)");
+ expected.push_back("(contains \\( some \\) other parenthesis)");
+ expected.push_back("(and (?:non marked) ones)");
+ testSplit("(this)(contains \\( some \\) other parenthesis)(and (?:non marked) ones)", expected);
+
+ testBackReference("this does not contain any", false);
+ testBackReference("this does contain \1円 one", true);
+ testBackReference("and also this one (?(2)...) does", true);
+ testBackReference("while this one (?(foo)...) does NOT does", false);
+
+ return result;
+}
diff --git a/src/lib/textformatter.cpp b/src/lib/textformatter.cpp
index bf681fb..cf87460 100644
--- a/src/lib/textformatter.cpp
+++ b/src/lib/textformatter.cpp
@@ -4,42 +4,334 @@
// Description:
//
//
-// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2004
+// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 1999-2007
//
// Copyright: See COPYING file that comes with this distribution
//
//
+
#include "textformatter.h"
+#include "textgenerator.h"
+#include "preformatter.h"
+#include "mainoutputbuffer.h"
+#include "messages.h"
+#include "fileutil.h"
+#include "fileinfo.h"
-#include "maingeneratormap.h"
+#include <boost/regex.hpp>
+#include <list>
using namespace std;
-TextFormatter::TextFormatter()
-{
+bool isTaggable(const string &elem) {
+ return elem.find_first_of(' ')== string::npos;
+}
+
+static boost::regex string_or_space_regex("([^[:blank:]]+)|([[:blank:]]+)");
+static SubstitutionMapping ref_substitutionmapping;
+
+#define SPACE 2
+#define NOT_SPACE 1
+
+TextFormatter::TextFormatter(PreFormatter *pf) :
+ default_generator(0), preformatter(pf), noOptimizations(false),
+ generateReferences(false), ctags_file(0) {
+}
+
+TextFormatter::TextFormatter(PreFormatter *pf, const string &_ctags_file_name,
+ const TextStyles::RefTextStyle &r, RefPosition pos) :
+ default_generator(0), preformatter(pf), noOptimizations(false),
+ generateReferences(true), ctags_file_name(_ctags_file_name),
+ refstyle(r), refposition(pos) {
+ ctags_file = tagsOpen(ctags_file_name.c_str(), &info);
+ if (ctags_file == 0) {
+ exitError("cannot open tag file: " + ctags_file_name);
+ }
+}
+
+TextFormatter::~TextFormatter() {
+ for (MapType::const_iterator it = textformatter.begin(); it != textformatter.end(); ++it)
+ delete it->second;
+
+ if (ctags_file)
+ tagsClose(ctags_file);
+}
+
+void TextFormatter::setDefaultGenerator(TextGenerator *gen) {
+ default_generator = gen;
}
-TextFormatter::~TextFormatter()
-{
+void TextFormatter::addGenerator(const std::string &elem, TextGenerator *gen) {
+ textformatter[elem] = gen;
}
-void
-TextFormatter::format(const string &elem, const string &text, const FileInfo *p)
-{
- if (! text.size())
- return;
+TextGenerator *TextFormatter::hasGenerator(const string &elem) {
+ MapType::const_iterator it = textformatter.find(elem);
+ if (it == textformatter.end())
+ return 0;
+
+ return it->second;
+}
+
+TextGenerator *TextFormatter::getGenerator(const string &elem) {
+ MapType::const_iterator it = textformatter.find(elem);
+ if (it == textformatter.end()) {
+ // create a copy of the prototype and substitute the style.
+ TextGenerator *missing = new TextGenerator(*default_generator);
+ missing->subst_style(elem);
+
+ textformatter[elem] = missing;
+ return missing;
+ }
- generatormap->generateEntire(elem, text, p);
+ return it->second;
}
-void
-TextFormatter::format_nl(const string &text)
-{
- generatormap->generateNL(text);
+void TextFormatter::addNoReference(const std::string &elem) {
+ noreferences.insert(elem);
+}
+
+bool TextFormatter::isNoReference(const std::string &elem) const {
+ return (noreferences.find(elem) != noreferences.end());
+}
+
+const string TextFormatter::generateString(const std::string &elem,
+ const std::string &s, const FileInfo *p) {
+ if (!generateReferences || isNoReference(elem)) {
+ return generateStringNoRef(elem, s);
+ } else {
+ return generateStringAndRef(elem, s, p);
+ }
+}
+
+const string TextFormatter::generateStringNoRef(const std::string &elem,
+ const std::string &s) {
+ return getGenerator(elem)->generateEntire(preformatter->preformat(s));
+}
+
+void TextFormatter::generateEntire(const std::string &elem,
+ const std::string &s, const FileInfo *p) {
+ if (noOptimizations) {
+ // we generate the element right now, since during debugging
+ // we want to be very responsive
+ if (s.size())
+ output(generateString(elem, s, p));
+
+ return;
+ }
+
+ // otherwise we optmize output generation: delay formatting a specific
+ // element until we deal with another element; this way strings that belong
+ // to the same element are formatted using only one tag: e.g.,
+ // <comment>/* mycomment */</comment>
+ // instead of
+ // <comment>/*</comment><comment>mycomment</comment><comment>*/</comment>
+ if (elem == current_elem) {
+ elem_buffer << s;
+ } else {
+ // first format the buffered string
+ const string toformat = elem_buffer.str();
+ if (toformat.size())
+ output(generateString(current_elem, toformat, p));
+
+ // then start a new buffer
+ elem_buffer.str("");
+ elem_buffer << s;
+ current_elem = elem;
+ current_file_info = p;
+ }
+}
+
+void TextFormatter::generateNL(const std::string &text) {
+ // first format the buffered string
+ flush();
+
+ string preformat_text = preformatter->preformat(text);
+
+ if (preformat_text == text)
+ preformat_text = "\n";
+
+ outputbuffer->output_ln(preformat_text);
+}
+
+void TextFormatter::flush() {
+ const string &remainder = elem_buffer.str();
+ if (remainder.size()) {
+ output(generateString(current_elem, remainder, current_file_info));
+ elem_buffer.str("");
+ current_elem = "";
+ // each line is handled separately
+ }
+}
+
+void TextFormatter::output(const string &s) {
+ outputbuffer->output(s);
+}
+
+//#define DEBUGREF
+#ifdef DEBUGREF
+#include <iostream>
+#define DEB(x) cerr << x << endl;
+#define DEB2(x) cerr << x ;
+#else
+#define DEB(x) ;
+#define DEB2(x) ;
+#endif
+
+/*
+ * separates a line in block of spaces and block of non spaces.
+ * the stringbuffer tokens stores the pieces seen so far for which no
+ * entry in the tag file was found.
+ *
+ * for each block of non spaces tries to look for an entry in the tag.
+ * if it finds it flushes the stringbuffer tokens (by passing its contents
+ * to the parent class implementation of generateString).
+ *
+ * For instance (notice the spaces among the +)
+ * "myfield + myfield2 + myfield3
+ * if only an entry for myfield2 is found, the we will generate
+ * 3 blocks:
+ * "myfield + "
+ * "myfield2"
+ * " + myfield3"
+ */
+const std::string TextFormatter::generateStringAndRef(const std::string& elem,
+ const std::string& s, const FileInfo* fileinfo) {
+ buffer.str("");
+ ostringstream tokens;
+
+ boost::sregex_iterator i(s.begin(), s.end(), string_or_space_regex);
+ boost::sregex_iterator j;
+ while (i != j) {
+ if ((*i)[SPACE].matched) {
+ tokens << string((*i)[SPACE].first, (*i)[SPACE].second);
+ } else {
+ string not_spaces = string((*i)[NOT_SPACE].first, (*i)[NOT_SPACE].second);
+ string found_refs = generateRefInfo(elem, not_spaces, fileinfo);
+ if (found_refs.size()) {
+ const string &previous = tokens.str();
+ if (previous.size()) {
+ buffer << generateStringNoRef(elem, previous);
+ tokens.str("");
+ }
+ buffer << found_refs;
+ } else {
+ tokens << not_spaces;
+ }
+ }
+
+ ++i;
+ }
+
+ const string &remainder = tokens.str();
+ if (remainder.size()) {
+ buffer << generateStringNoRef(elem, remainder);
+ }
+
+ return buffer.str();
+}
+
+struct RefEntry {
+ string filename;
+ unsigned long linenumber;
+};
+
+const string TextFormatter::generateRefInfo(const std::string& elem,
+ const std::string& s, const FileInfo* fileinfo) {
+ tagEntry entry;
+ bool found = false; // whether we found a tag
+ bool found_anchor = false; // whether we found an anchor
+ string output;
+ typedef list<RefEntry> FoundRefList;
+ FoundRefList foundreflist;
+
+ DEB("inspecting " + s)
+
+ if (tagsFind(ctags_file, &entry, s.c_str(), TAG_FULLMATCH)== TagSuccess) {
+ found = true;
+ do {
+ RefEntry refentry;
+ refentry.filename = entry.file;
+ if ((refentry.filename == fileinfo->filename ||
+ refentry.filename == fileinfo->input_file_name) &&entry.address.lineNumber == fileinfo->line) {
+ ostringstream gen_info;
+ // we just found the reference to this very element
+ // so we must generate an anchor
+ gen_info << entry.address.lineNumber;DEB(" found anchor " + gen_info.str());
+ ref_substitutionmapping["$text"] = preformatter->preformat(s);
+ ref_substitutionmapping["$infilename"] = strip_file_path(refentry.filename);
+ ref_substitutionmapping["$infile"] = refentry.filename;
+ ref_substitutionmapping["$linenum"] = gen_info.str();
+ output = refstyle.anchor.output(ref_substitutionmapping);
+ found_anchor = true;
+ break;
+ }
+
+ DEB2(" found " + string(entry.name) + " : ");DEB(entry.address.lineNumber);
+
+ refentry.linenumber = entry.address.lineNumber;
+ foundreflist.push_back(refentry);
+ } while (tagsFindNext(ctags_file, &entry)== TagSuccess);
+ }
+
+ if (found) {
+ if (! found_anchor) {
+ ref_substitutionmapping["$text"] = preformatter->preformat(s);
+ TextStyle *referencestyle = 0;
+ if ((foundreflist.size()>1 && refposition == INLINE) || refposition == POSTLINE)
+ referencestyle = &(refstyle.postline_reference);
+ else if (refposition == POSTDOC)
+ referencestyle = &(refstyle.postdoc_reference);
+ else
+ referencestyle = &(refstyle.inline_reference);
+
+ for (FoundRefList::const_iterator it = foundreflist.begin(); it != foundreflist.end(); ++it) {
+ ostringstream gen_info;
+ // we found where this element appears so we generate a reference
+ // if it's a link in the same file, we use the output_file_name...
+ if (it->filename == fileinfo->filename || it->filename == fileinfo->input_file_name)
+ gen_info << fileinfo->output_file_name;
+ else
+ gen_info << it->filename << fileinfo->output_file_extension;
+ // ...otherwise we build the referenced file by using the output_file_extension
+ // in fact, in this case, it probably means that multiple input files have been specified
+
+ ref_substitutionmapping["$outfile"] = gen_info.str();
+ ref_substitutionmapping["$infilename"] = strip_file_path(it->filename);
+ ref_substitutionmapping["$infile"] = it->filename;
+
+ gen_info.str("");
+ gen_info << it->linenumber;
+ ref_substitutionmapping["$linenum"] = gen_info.str();
+ output += referencestyle->output(ref_substitutionmapping);
+
+ // if the following is true, it means that there more than one reference
+ if (foundreflist.size()> 1 || refposition != INLINE) {
+ output += preformatter->preformat("\n");
+
+ if (refposition == POSTLINE || refposition == INLINE) {
+ outputbuffer->output_postline(output);
+ } else { // (refposition == POSTDOC)
+ outputbuffer->output_post(output);
+ }
+
+ output = ""; // no need to modify the current element
+ }
+ }
+ }
+ }
+
+ return output;
+}
+
+void TextFormatter::format(const string &elem, const string &text,
+ const FileInfo *p) {
+ if (! text.size())
+ return;
+
+ generateEntire(elem, text, p);
}
-void
-TextFormatter::flush()
-{
- generatormap->flush();
+void TextFormatter::format_nl(const string &text) {
+ generateNL(text);
}
diff --git a/src/lib/textformatter.h b/src/lib/textformatter.h
index 3d6230c..ec0bf6e 100644
--- a/src/lib/textformatter.h
+++ b/src/lib/textformatter.h
@@ -9,33 +9,119 @@
// Copyright: See COPYING file that comes with this distribution
//
//
-#ifndef TEXTFORMATTER_H
-#define TEXTFORMATTER_H
+#ifndef GENERATORMAP_H
+#define GENERATORMAP_H
+#include <map>
#include <string>
-#include <boost/shared_ptr.hpp>
+#include <set>
+#include "my_sstream.h"
+#include "readtags.h"
+#include "textstyles.h"
+#include "refposition.h"
+
+class TextGenerator;
+class PreFormatter;
+class FileInfo;
/**
-format text
+ * Formats the elements of a source file, using a map of generators;
+ * associate a generator for each program element, e.g., keyword, string, etc.
+ *
+ * @author Lorenzo Bettini
+ */
+class TextFormatter {
+protected:
+ typedef std::map<std::string, TextGenerator *> MapType;
+ typedef std::set<std::string> NoRefType;
+ MapType textformatter;
+ /// those elements for which no reference info is generated
+ NoRefType noreferences;
+ TextGenerator *default_generator;
+ PreFormatter *preformatter;
+ /// where we buffer strings for the current elem
+ std::ostringstream elem_buffer;
+ /// the element that is currently buffered
+ std::string current_elem;
+ /// concerns the element currently buffered
+ const FileInfo *current_file_info;
+ /// whether to turn off optimizazionts (such as buffering), default: false
+ bool noOptimizations;
-@author Lorenzo Bettini
-*/
+ bool generateReferences;
-class FileInfo;
+ // for references
+
+ const std::string ctags_file_name;
+ TextStyles::RefTextStyle refstyle;
+ RefPosition refposition;
+ tagFile *ctags_file;
+ tagFileInfo info;
+ std::ostringstream buffer;
+
+ const std::string generateString(const std::string &elem,
+ const std::string &s, const FileInfo *);
+
+ const std::string generateStringAndRef(const std::string &elem,
+ const std::string &s, const FileInfo *);
+
+ const std::string generateStringNoRef(const std::string &elem,
+ const std::string &s);
+
+ /**
+ * Generates the reference information
+ */
+ const std::string generateRefInfo(const std::string& elem,
+ const std::string& s, const FileInfo* arg1);
+
+ /**
+ * Actually performs the output through an OutputBuffer
+ */
+ void output(const std::string &s);
+
+ void generateEntire(const std::string &elem, const std::string &s,
+ const FileInfo *);
+ void generateNL(const std::string &s);
-class TextFormatter
-{
public:
- TextFormatter();
+ TextFormatter(PreFormatter *);
- ~TextFormatter();
+ TextFormatter(PreFormatter *pf, const std::string &_ctags_file_name,
+ const TextStyles::RefTextStyle &r, RefPosition pos);
- void format(const std::string &elem, const std::string &text,
- const FileInfo *);
- void format_nl(const std::string &text = "\n");
- void flush();
-};
+ ~TextFormatter();
+
+ /**
+ * Returns the generator for the specific element name or null if
+ * there's no generator for the element
+ * @param elem
+ * @return
+ */
+ TextGenerator *hasGenerator(const std::string &elem);
+
+ /**
+ * Retrieves the generator for a specific element; if it doesn't find it,
+ * it creates a generator for that element, using the default generator
+ * (i.e., the one for "normal" element)
+ * @param elem
+ * @return
+ */
+ TextGenerator *getGenerator(const std::string &elem);
+ void addGenerator(const std::string &elem, TextGenerator *gen);
+ void addNoReference(const std::string &elem);
+ bool isNoReference(const std::string &elem) const;
-typedef boost::shared_ptr<TextFormatter> TextFormatterPtr;
+ void setDefaultGenerator(TextGenerator *g);
+
+ void flush();
+
+ void setNoOptimizations(bool n) {
+ noOptimizations = n;
+ }
+
+ void format(const std::string &elem, const std::string &text,
+ const FileInfo *);
+ void format_nl(const std::string &text = "\n");
+};
#endif
diff --git a/src/lib/tostringcollection.h b/src/lib/tostringcollection.h
index 99b753d..a4b20f6 100644
--- a/src/lib/tostringcollection.h
+++ b/src/lib/tostringcollection.h
@@ -1,10 +1,8 @@
//
-// C++ Interface: %{MODULE}
+// Description: given a collection generates a string representation
//
-// Description:
//
-//
-// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR}
+// Author: Lorenzo Bettini, 1999-2007 <http://www.lorenzobettini.it>
//
// Copyright: See COPYING file that comes with this distribution
//
@@ -16,6 +14,13 @@
#include <string>
#include "my_sstream.h"
+/**
+ * Converts a collection of objects with method toString into a string,
+ * using the passed separator to separate the elements.
+ *
+ * @param collection
+ * @param sep
+ */
template <class T>
const std::string toStringCollection(const T *collection, char sep = ' ')
{
@@ -32,4 +37,51 @@ const std::string toStringCollection(const T *collection, char sep = ' ')
return buf.str();
}
+/**
+ * Converts a collection of objects with method toStringOriginal into a string,
+ * using the passed separator to separate the elements.
+ *
+ * @param collection
+ * @param sep
+ */
+template <class T>
+const std::string toStringOriginalCollection(const T *collection, char sep = ' ')
+{
+ std::ostringstream buf;
+
+ for (typename T::const_iterator it = collection->begin();
+ it != collection->end(); )
+ {
+ buf << (*it)->toStringOriginal();
+ if (++it != collection->end())
+ buf << sep;
+ }
+
+ return buf.str();
+}
+
+/**
+ * Converts a collection of objects into a string,
+ * using the passed separator to separate the elements.
+ *
+ * @param collection
+ * @param sep
+ */
+template <class T>
+const std::string collectionToString(const T *collection, char sep = ' ')
+{
+ std::ostringstream buf;
+
+ for (typename T::const_iterator it = collection->begin();
+ it != collection->end(); )
+ {
+ buf << (*it);
+ if (++it != collection->end())
+ buf << sep;
+ }
+
+ return buf.str();
+}
+
+
#endif // TOSTRINGCOLLECTION_H
diff --git a/src/log.lang b/src/log.lang
index af1611e..6a7b946 100644
--- a/src/log.lang
+++ b/src/log.lang
@@ -39,9 +39,7 @@ ip = $ip
string = "root","failure"
-state normal = '(port|pid)[[:blank:]]' begin
- port = '[[:digit:]]+' exit
-end
+(normal,port) = `((?:port|pid)[[:blank:]])([[:digit:]]+)`
state normal start '[[:blank:]](?=(IN|OUT)=)' begin
state normal = '(IN|OUT|PROTO)=(?=[^[:blank:]]+)' begin
diff --git a/src/logtalk.lang b/src/logtalk.lang
index d9b5405..b2c03ff 100644
--- a/src/logtalk.lang
+++ b/src/logtalk.lang
@@ -55,14 +55,14 @@ variable = '\<[A-Z_][A-Za-z0-9_]*'
cbracket = "{|}"
-preproc = '^[[:blank:]]*:-[[:blank:]](ca(lls|tegory) | p(ublic|r(ot(ocol|ected)|ivate)) | e(ncoding|xports) | in(fo|itialization) | alias | d(ynamic|iscontiguous) | meta_predicate | m(etapredicate|od(e|ule)|ultifile) | o(bject|p) | use(s|_module))(?=\()',
- '^[[:blank:]]*:-[[:blank:]](end_(category|object|protocol) | dynamic)\.'
+preproc = '^[[:blank:]]*:-[[:blank:]](ca(lls|tegory)|p(ublic|r(ot(ocol|ected)|ivate))|e(ncoding|xports)|in(fo|itialization)|alias|d(ynamic|iscontiguous)|meta_predicate|m(etapredicate|od(e|ule)|ultifile)|o(bject|p)|use(s|_module))(?=\()',
+ '^[[:blank:]]*:-[[:blank:]](end_(category|object|protocol)|dynamic)\.'
preproc = '\<(extends|i(nstantiates|mp(lements|orts))|specializes)(?=\()'
normal = '\<[a-z][A-Za-z0-9_]*'
-number = '0\'[A-Za-z0-9] | 0b[0-1]+ | 0o[0-7]+ | 0x[0-9a-fA-F]+ | [0-9]+(\.[0-9]+)?([eE]([-+])?[0-9]+)?'
+number = '0\'[A-Za-z0-9]|0b[0-1]+|0o[0-7]+|0x[0-9a-fA-F]+|[0-9]+(\.[0-9]+)?([eE]([-+])?[0-9]+)?'
symbol = "::", "^^",
">>", "<<", "/\\", "\\/", "\\",
diff --git a/src/outlang.map b/src/outlang.map
index b8907e5..9a0ced6 100644
--- a/src/outlang.map
+++ b/src/outlang.map
@@ -16,4 +16,5 @@ latexcolor = latexcolor.outlang
latexcolor-doc = latexcolordoc.outlang
texinfo = texinfo.outlang
javadoc = javadoc.outlang
-docbook = docbook.outlang \ No newline at end of file
+docbook = docbook.outlang
+docbook-doc = docbookdoc.outlang \ No newline at end of file
diff --git a/src/perl.lang b/src/perl.lang
index 8c3672b..00d1011 100644
--- a/src/perl.lang
+++ b/src/perl.lang
@@ -1,19 +1,117 @@
preproc = "import"
+# these might be unreadable but I don't know how else to do that...
+regexp = 's\{(\\\}|[^}])*\}\{(\\\}|[^}])*\}[ixsmogce]*'
+regexp = 's\((\\\)|[^)])*\)\((\\\)|[^)])*\)[ixsmogce]*'
+regexp = 's\[(\\\]|[^\]])*\]\[(\\\]|[^\]])*\][ixsmogce]*'
+regexp = 's<.*><.*>[ixsmogce]*'
+
+# the last (lookahead) expression is used to deal with # used
+# as a delimiter.
+# otherwise, with a line such as
+# s#foo\###; # my comment
+# the # of the comment would be match as the closing delimiter
+regexp = `s([^[:alnum:][:blank:]]).*1円.*1円[ixsmogce]*(?=[[:blank:]]*(\)|;))`
+
+# this is to deal with cases where the delimiters for the first and the
+# second part are not the same (and spaces are allowed between the first
+# closing and the second opening)
+regexp = `s([^[:alnum:][:blank:]]).*1円[[:blank:]]*([^[:alnum:][:blank:]]).*2円[ixsmogce]*(?=[[:blank:]]*(\)|;))`
+
include "script_comment.lang"
include "number.lang"
+# this won't work if # has something (non blank) before
+vardef comment_in_exp = '[[:blank:]]+#.*'
+
+vardef var_in_exp = '\$([[:word:]]+|\{[[:word:]]+\})'
+
+# this is to highlight correctly regular expressions
+# (and don't mix them with { } code blocks
+environment keyword = '(m|qr)(?=\{)' begin
+ environment regexp = '\{' begin
+ comment = $comment_in_exp
+ variable = $var_in_exp
+ regexp = '\\\{|\\\}'
+ regexp = "}" exitall
+ end
+end
+
+# repeat for other non alpha numerical chars
+environment keyword = '(m|qr)(?=#)' begin
+ environment regexp = '#' begin
+ comment = $comment_in_exp
+ variable = $var_in_exp
+ regexp = '\\#'
+ regexp = "#" exitall
+ end
+end
+
+environment keyword = '(m|qr)(?=\|)' begin
+ environment regexp = '\|' begin
+ comment = $comment_in_exp
+ variable = $var_in_exp
+ regexp = '\\\|'
+ regexp = "\|" exitall
+ end
+end
+
+environment keyword = '(m|qr)(?=@)' begin
+ environment regexp = '@' begin
+ comment = $comment_in_exp
+ variable = $var_in_exp
+ regexp = '\\@'
+ regexp = "@" exitall
+ end
+end
+
+environment keyword = '(m|qr)(?=<)' begin
+ environment regexp = '<' begin
+ comment = $comment_in_exp
+ variable = $var_in_exp
+ regexp = '\\<|\\>'
+ regexp = ">" exitall
+ end
+end
+
+environment keyword = '(m|qr)(?=\[)' begin
+ environment regexp = '\[' begin
+ comment = $comment_in_exp
+ variable = $var_in_exp
+ regexp = '\\]'
+ regexp = "]" exitall
+ end
+end
+
+environment keyword = '(m|qr)(?=\\)' begin
+ environment regexp = '\\' begin
+ comment = $comment_in_exp
+ variable = $var_in_exp
+ regexp = '\\\\'
+ regexp = "\\" exitall
+ end
+end
+
+environment keyword = '(m|qr)(?=/)' begin
+ environment regexp = '/' begin
+ comment = $comment_in_exp
+ variable = $var_in_exp
+ regexp = '\\/'
+ regexp = "/" exitall
+ end
+end
+
string delim "\"" "\"" escape "\\"
string delim "'" "'" escape "\\"
string delim "<" ">"
string = '[[:word:]]*/[^\n]*/[[:word:]]*'
-keyword = "chomp|chop|chr|crypt|hex|index|lc|lcfirst|length|oct|ord|pack|q|qq|reverse|rindex|sprintf|substr|tr|uc|ucfirst|m|s|g|qw|abs|atan2|cos|exp|hex|int|log|oct|rand|sin|sqrt|srand|my|local|our|delete|each|exists|keys|values|pack|read|syscall|sysread|syswrite|unpack|vec|undef|unless|return|length|grep|sort|caller|continue|dump|eval|exit|goto|last|next|redo|sub|wantarray|pop|push|shift|splice|unshift|split|switch|join|defined|foreach|last|chop|chomp|bless|dbmclose|dbmopen|ref|tie|tied|untie|while|next|map|eq|die|cmp|lc|uc|and|do|if|else|elsif|for|use|require|package|import|chdir|chmod|chown|chroot|fcntl|glob|ioctl|link|lstat|mkdir|open|opendir|readlink|rename|rmdir|stat|symlink|umask|unlink|utime|binmode|close|closedir|dbmclose|dbmopen|die|eof|fileno|flock|format|getc|print|printf|read|readdir|rewinddir|seek|seekdir|select|syscall|sysread|sysseek|syswrite|tell|telldir|truncate|warn|write|alarm|exec|fork|getpgrp|getppid|getュpriority|kill|pipe|qx|setpgrp|setpriority|sleep|system|times|wait|waitpid"
+keyword = "chomp|chop|chr|crypt|hex|i|index|lc|lcfirst|length|oct|ord|pack|q|qq|reverse|rindex|sprintf|substr|tr|uc|ucfirst|m|s|g|qw|abs|atan2|cos|exp|hex|int|log|oct|rand|sin|sqrt|srand|my|local|our|delete|each|exists|keys|values|pack|read|syscall|sysread|syswrite|unpack|vec|undef|unless|return|length|grep|sort|caller|continue|dump|eval|exit|goto|last|next|redo|sub|wantarray|pop|push|shift|splice|unshift|split|switch|join|defined|foreach|last|chop|chomp|bless|dbmclose|dbmopen|ref|tie|tied|untie|while|next|map|eq|die|cmp|lc|uc|and|do|if|else|elsif|for|use|require|package|import|chdir|chmod|chown|chroot|fcntl|glob|ioctl|link|lstat|mkdir|open|opendir|readlink|rename|rmdir|stat|symlink|umask|unlink|utime|binmode|close|closedir|dbmclose|dbmopen|die|eof|fileno|flock|format|getc|print|printf|read|readdir|rewinddir|seek|seekdir|select|syscall|sysread|sysseek|syswrite|tell|telldir|truncate|warn|write|alarm|exec|fork|getpgrp|getppid|getュpriority|kill|pipe|qx|setpgrp|setpriority|sleep|system|times|x|wait|waitpid"
comment delim '^\=(?:head1|head2|item)' '\=cut' multiline
-type = '(?:\$[#]?|@|%)[[:word:]]+'
+variable = '(?:\$[#]?|@|%)[/[:word:]]+'
include "symbols.lang"
diff --git a/src/postscript.lang b/src/postscript.lang
index b0842c6..0ffeaf3 100644
--- a/src/postscript.lang
+++ b/src/postscript.lang
@@ -31,6 +31,10 @@ keyword = "abs|add|aload|anchorsearch|and|arc|arcn|arct|arcto|array|ashow|astore
variable = $ID
+environment string delim "(" ")" multiline nested begin
+ specialchar = '\\.'
+end
+
comment start "%"
include "number.lang"
diff --git a/src/regexpengine.cpp b/src/regexpengine.cpp
deleted file mode 100644
index fda8409..0000000
--- a/src/regexpengine.cpp
+++ /dev/null
@@ -1,177 +0,0 @@
-//
-// C++ Implementation: regexpengine
-//
-// Description:
-//
-//
-// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 2004
-//
-// Copyright: See COPYING file that comes with this distribution
-//
-//
-#include "regexpengine.h"
-
-
-RegExpEngine::~RegExpEngine()
-{
-}
-
-#include <fstream>
-#include <iostream>
-#include <stdlib.h>
-
-#include "maingeneratormap.h"
-#include "keys.h"
-#include "langdefloader.h"
-#include "messages.h"
-#include "textformatter.h"
-#include "parserinfo.h"
-
-// purpose:
-// takes the contents of a file and transform to
-// syntax highlighted code in html format
-
-using namespace std;
-
-typedef enum { FOUND_EOF=0, FOUND_NL, FOUND_END } load_line_ret;
-
-load_line_ret
-load_line(std::string& s, std::istream& is)
-{
- s.erase();
- if (is.bad() || is.eof())
- return FOUND_EOF;
-
- char c;
- while (is.get(c))
- {
- if (c == '\n')
- return FOUND_NL;
- if (c != '\r')
- s.append(1, c);
- }
-
- return FOUND_END;
-}
-
-void
-RegExpEngine::process_file(const char *file)
-{
- istream *is = 0;
-
- if (file)
- {
- is = new ifstream(file);
- if (!is || ! (*is))
- {
- cerr << "Error in opening " << file
- << " for input" << endl ;
- exit(1) ;
- }
- }
- else
- is = &cin;
-
- std::string s;
-
- std::string::const_iterator start, end;
- boost::match_results<std::string::const_iterator> what;
- boost::match_flag_type flags;
-
- initial_state = currentstate;
-
- fileinfo->line = 1;
-
- load_line_ret ret;
- while ((ret = load_line(s, *is)) != FOUND_EOF)
- {
- bool matched = true;
- bool found_eol = false;
- start = s.begin();
- end = s.end();
- flags = boost::match_default;
-
- while (matched) {
- if (boost::regex_search(start, end, what, currentstate->reg_exp, flags))
- {
- string prefix = what.prefix();
- if (prefix.size())
- format(-1, prefix);
-
- for (unsigned int i = 1; i < what.size(); ++i) {
- if (what[i].matched) {
- format(i, string(what[i].first, what[i].second));
- if (currentstate->formatters[i]->getNextState()) {
- enterState(i);
- } else if (currentstate->formatters[i]->exit_state_level) {
- if (currentstate->formatters[i]->exit_all) {
- exitAll();
- } else {
- exitState(currentstate->formatters[i]->exit_state_level);
- }
- }
- start = what[i].second;
- if (!(*start)) {
- if (found_eol)
- matched = false; // we had already matched end of line
- found_eol = true;
- }
- break; // no other match is possible
- }
- }
-
- if (what[0].first != what[0].second) // matched more than 0
- flags |= boost::match_not_bol;
- }
- else
- {
- format(-1, string(start, end));
- matched = false;
- }
- }
-
- if (ret == FOUND_NL)
- formatter->format_nl("\n");
-
- (fileinfo->line)++;
- }
-
- formatter->flush();
-
- if (file)
- delete is;
-
- currentstate = initial_state; // reset the initial state
-}
-
-void
-RegExpEngine::enterState(int index)
-{
- states_stack.push(currentstate);
- currentstate = currentstate->formatters[index]->getNextState();
-}
-
-void
-RegExpEngine::exitState(int level)
-{
- // remove additional levels
- for (int l = 1; l < level; ++l)
- states_stack.pop();
-
- currentstate = states_stack.top();
- states_stack.pop();
-}
-
-void
-RegExpEngine::exitAll()
-{
- currentstate = initial_state;
- states_stack = stack_of_states();
-}
-
-void
-RegExpEngine::format(int index, const std::string &s)
-{
- formatter->format(currentstate->get_elem(index), s, fileinfo);
-}
-
diff --git a/src/regexpstate.cpp b/src/regexpstate.cpp
deleted file mode 100644
index 997db08..0000000
--- a/src/regexpstate.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
-//
-// C++ Implementation: %{MODULE}
-//
-// Description:
-//
-//
-// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR}
-//
-// Copyright: See COPYING file that comes with this distribution
-//
-//
-#include "regexpstate.h"
-
-#include "keys.h"
-#include "messages.h"
-
-#include <stdlib.h>
-
-using namespace std;
-
-int RegExpState::global_id = 1;
-
-RegExpFormatter::RegExpFormatter(const string &el, RegExpStatePtr r, int exit, bool all) :
- elem(el), exit_state_level(exit), exit_all(all), next_state(r)
-{
-}
-
-void RegExpFormatter::setNextState(RegExpStatePtr r)
-{
- next_state_strong = r;
-}
-
-RegExpStatePtr RegExpFormatter::getNextState() const
-{
- RegExpStatePtr next = next_state.lock();
- if (!next)
- return next_state_strong;
-
- return next;
-}
-
-/**
- * Return the formatter associated to the passed index.
- * If the index is negative, it returns the default formatter.
- * @param index
- * @return
- */
-const string &
-RegExpState::get_elem(int index)
-{
- return formatters[(index<0 ? 0 : index)]->elem;
-}
-
-void
-RegExpState::add_exp(const std::string &s, ParserInfo *parserInfo, RegExpFormatterPtr f)
-{
- const string &ex = buffer.str();
- if (ex.size())
- buffer << "|";
-
- buffer << s;
-
- formatters.push_back(f);
- subExpressions.push_back(make_pair(s, *parserInfo));
-}
-
-void
-RegExpState::freeze()
-{
- const string &buffered = buffer.str();
- try {
- reg_exp.assign(buffered);
- } catch (boost::bad_expression &e) {
- printError("bad expression: " + buffered);
- throw;
- }
-}
-
-void
-RegExpState::set_default_formatter(RegExpFormatterPtr f)
-{
- formatters[0] = f;
-}
diff --git a/src/regexpstate.h b/src/regexpstate.h
deleted file mode 100644
index f38ea51..0000000
--- a/src/regexpstate.h
+++ /dev/null
@@ -1,78 +0,0 @@
-//
-// C++ Interface: %{MODULE}
-//
-// Description:
-//
-//
-// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR}
-//
-// Copyright: See COPYING file that comes with this distribution
-//
-//
-#ifndef REGEXPSTATE_H
-#define REGEXPSTATE_H
-
-#include <boost/regex.hpp>
-#include <boost/shared_ptr.hpp>
-#include <boost/weak_ptr.hpp>
-#include <deque>
-#include <vector>
-#include "my_sstream.h"
-#include "parserinfo.h"
-
-struct RegExpState;
-
-typedef boost::shared_ptr<RegExpState> RegExpStatePtr;
-typedef boost::weak_ptr<RegExpState> RegExpStatePtrW;
-
-struct RegExpFormatter
-{
- const std::string elem; // the element represented
- int exit_state_level; // how many states we must leave
- bool exit_all;
-
- RegExpFormatter(const std::string &el, RegExpStatePtr r = RegExpStatePtr(), int exit = 0, bool all = false);
-
- void setNextState(RegExpStatePtr r);
- RegExpStatePtr getNextState() const;
-
- private:
- RegExpStatePtrW next_state;
- RegExpStatePtr next_state_strong;
- /*
- FIXME
- the next_state is a weak pointer when there's a "nested" situation.
- This allows to avoid cycles, that otherwise would prevent memory from
- being correctly freed.
- */
-};
-
-typedef boost::shared_ptr<RegExpFormatter> RegExpFormatterPtr;
-typedef std::deque<RegExpFormatterPtr> format_vector;
-typedef std::pair<std::string, ParserInfo> SubExpressionInfo;
-typedef std::vector<SubExpressionInfo> SubExpressions;
-
-/**
-class representing a state for the regular expression engine
-
-@author Lorenzo Bettini
- */
-struct RegExpState
-{
- static int global_id;
- const int id; // the identifier of the state
- boost::regex reg_exp;
- SubExpressions subExpressions;
- format_vector formatters;
- std::ostringstream buffer;
-
- RegExpState() : id(global_id++), formatters(1) {}
-
- const std::string &get_elem(int index = -1);
- void add_exp(const std::string &s, ParserInfo *parserInfo, RegExpFormatterPtr f);
- void freeze();
- void set_default_formatter(RegExpFormatterPtr f);
- RegExpFormatterPtr getLastFormatter() const { return formatters[formatters.size()-1];}
-};
-
-#endif
diff --git a/src/ruby.lang b/src/ruby.lang
index 711fb7e..dcf64b7 100644
--- a/src/ruby.lang
+++ b/src/ruby.lang
@@ -7,7 +7,8 @@ include "number.lang"
string delim "\"" "\"" escape "\\"
string delim "'" "'" escape "\\"
string delim "<" ">"
-string = '[[:word:]]*/[^\n]*/[[:word:]]*'
+regexp = '/[^\n]*/'
+(symbol,regexp) = `(%r)(\{(?:\\\}|#\{[[:alnum:]]+\}|[^}])*\})`
keyword = "alias|begin|BEGIN|break|case|defined|do|else|elsif|end|END|ensure|for|if|in|include|loop|next|raise|redo|rescue|retry|return|super|then|undef|unless|until|when|while|yield|false|nil|self|true|__FILE__|__LINE__|and|not|or|def|class|module|catch|fail|load|throw"
@@ -15,9 +16,17 @@ comment delim '(^\=begin)' '^(\=end)' multiline
type = '(\$[#]?|@@|@)([[:word:]]+|\'|\"|/)'
+# don't highlight ? and ! as symbols if they are part of a method call
+normal = '[[:alnum:]]+(\?|!)'
+
include "symbols.lang"
+# for variable interpolation, #{ is not a comment
+(symbol,cbracket) = `(#)(\{)`
+
cbracket = "{|}"
-include "function.lang"
+# no function highlighting for Ruby, since a method invocation
+# can be written even without parenthesis
+# include "function.lang"
diff --git a/src/startapp.cc b/src/startapp.cc
index a025801..5409637 100644
--- a/src/startapp.cc
+++ b/src/startapp.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 1999-2005 Lorenzo Bettini, www.lorenzobettini.it
+ * Copyright (C) 1999-2007 Lorenzo Bettini, www.lorenzobettini.it
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -41,11 +41,10 @@
#include "parsestyles.h"
#include "generatorfactory.h"
-#include "textformatter.h"
#include "srcuntabifier.h"
#include "chartranslator.h"
#include "langdefloader.h"
-#include "lineoutputgenerator.h"
+#include "outputgenerator.h"
#include "langmap.h"
#include "regexpengine.h"
#include "regexpenginedebug.h"
@@ -54,6 +53,7 @@
#include "outlangdefparserfun.h"
#include "fileinfo.h"
#include "stopwatch.h"
+#include "textformatter.h"
#include "languageinfer.h"
@@ -72,10 +72,9 @@ ostream* sout;
#include "envmapper.h"
#endif // BUILD_AS_CGI
-
unsigned int line_num_digit = 0; // num of digits to represent line number
-gengetopt_args_info args_info ; // command line structure
+gengetopt_args_info args_info; // command line structure
static void print_cgi_header();
static void run_ctags(const string &cmd);
@@ -84,668 +83,621 @@ static void run_ctags(const string &cmd);
* Print progress status information (provided --quiet is not specified)
* @param message
*/
-static void progressInfo(const string &message)
-{
- if (args_info.quiet_given)
- return;
+static void progressInfo(const string &message) {
+ if (args_info.quiet_given)
+ return;
- cerr << message;
+ cerr << message;
}
StartApp::StartApp() :
- docgenerator(0), formatter(0), preformatter(0),
- langmap(new LangMap), outlangmap(new LangMap), generator_factory(0),
- entire_doc (0), verbose (0), cssUrl (0),
- use_css (0), is_cgi (0), gen_version(true),
- generate_line_num(false), generate_ref(false)
-{
+ docgenerator(0), preformatter(0), langmap(new LangMap),
+ outlangmap(new LangMap), generator_factory(0), entire_doc(0),
+ verbose(0), cssUrl(0), use_css(0), is_cgi(0), gen_version(true),
+ generate_line_num(false), generate_ref(false) {
}
-StartApp::~StartApp()
-{
- // cout << "destroying StartApp..." << endl;
- cmdline_parser_free(&args_info);
-
- if (formatter)
- delete formatter;
+StartApp::~StartApp() {
+ // cout << "destroying StartApp..." << endl;
+ cmdline_parser_free(&args_info);
- if (preformatter)
- delete preformatter;
+ if (preformatter)
+ delete preformatter;
- if (docgenerator)
- delete docgenerator;
+ if (docgenerator)
+ delete docgenerator;
- if (generator_factory)
- delete generator_factory;
+ if (generator_factory)
+ delete generator_factory;
}
-int
-StartApp::start(int argc, char * argv[])
-{
- char *docTitle;
- char *docHeader; // the buffer with the header
- char *docFooter; // the buffer with the footer
- const char *header_fileName = 0;
- const char *footer_fileName = 0;
- unsigned i;
- int v;
- int tabSpaces = 0;
+int StartApp::start(int argc, char * argv[]) {
+ char *docTitle;
+ char *docHeader; // the buffer with the header
+ char *docFooter; // the buffer with the footer
+ const char *header_fileName = 0;
+ const char *footer_fileName = 0;
+ unsigned i;
+ int v;
+ int tabSpaces = 0;
#ifdef BUILD_AS_CGI
- // map environment to parameters if used as CGI
- char **temp_argv;
- temp_argv = map_environment(&argc, argv);
- is_cgi = temp_argv != argv;
- argv = temp_argv;
+ // map environment to parameters if used as CGI
+ char **temp_argv;
+ temp_argv = map_environment(&argc, argv);
+ is_cgi = temp_argv != argv;
+ argv = temp_argv;
#endif // BUILD_AS_CGI
+ if ((v = cmdline_parser(argc, argv, &args_info)) != 0)
+ // calls cmdline parser. The user gived bag args if it doesn't return -1
+ return EXIT_FAILURE;
+
+ if (args_info.version_given) {
+ print_version();
+ print_copyright();
+ return EXIT_SUCCESS;
+ }
- if((v = cmdline_parser(argc, argv, &args_info)) != 0)
- // calls cmdline parser. The user gived bag args if it doesn't return -1
- return EXIT_FAILURE;
+ if (args_info.help_given) {
+ cout << "GNU ";
+ cmdline_parser_print_help();
+ print_reportbugs();
+ return EXIT_SUCCESS;
+ }
- if (args_info.version_given)
- {
- print_version ();
- print_copyright ();
- return EXIT_SUCCESS;
+ gen_version = (args_info.gen_version_flag != 0);
+
+ /* initialization of global symbols */
+ inputFileName = outputFileName = 0;
+ sout = 0;
+ docTitle = 0;
+ docHeader = 0;
+ docFooter = 0;
+
+ docTitle = args_info.title_arg;
+ header_fileName = args_info.header_arg;
+ footer_fileName = args_info.footer_arg;
+ verbose = args_info.verbose_given;
+ const string style_file = args_info.style_file_arg;
+
+ if (args_info.tab_given > 0)
+ tabSpaces = args_info.tab_arg;
+
+ if (header_fileName)
+ docHeader = read_file(header_fileName);
+
+ if (footer_fileName)
+ docFooter = read_file(footer_fileName);
+
+ cssUrl = args_info.css_arg;
+ use_css = ( cssUrl != 0 );
+
+ entire_doc =(! args_info.no_doc_given) &&( args_info.doc_given || (docTitle != 0) || use_css );
+
+ string inputFileName;
+ if (args_info.input_given)
+ inputFileName = args_info.input_arg;
+
+ string outputFileName;
+ if (inputFileName.size()&& ! is_cgi && args_info.output_given)
+ outputFileName = args_info.output_arg;
+
+ bool generate_to_stdout =(args_info.output_arg &&
+ strcmp (args_info.output_arg, "STDOUT") == 0);
+
+ if (verbose)
+ setMessager(new DefaultMessages);
+
+ printMessage( PACKAGE);
+ printMessage( VERSION);
+ printMessage(argv[0]);
+
+ if (verbose) {
+ printMessage("command line arguments: ");
+ for (int i = 0; i < argc; ++i) {
+ printMessage(argv[i]);
+ }
}
- if (args_info.help_given)
- {
- cout << "GNU ";
- cmdline_parser_print_help ();
- print_reportbugs ();
- return EXIT_SUCCESS;
+ /*
+ the starting default path to search for files is computed at
+ run-time: it is
+ the path of the binary + ".." + RELATIVEDATADIR
+ this should make the package relocable (i.e., not stuck
+ with a fixed installation directory).
+ Of course, the GNU standards for installation directories
+ should be followed, but this is not a problem if you use
+ configure and make install features.
+ If no path is specified in the running program we go back to
+ the absolute datadir.
+ */
+ // this is defined in fileutil.cc
+ string prefix_dir = get_file_path(argv[0]);
+ if (prefix_dir.size())
+ start_path = get_file_path(argv[0])+ RELATIVEDATADIR;
+ else
+ start_path = ABSOLUTEDATADIR;
+
+ if (args_info.data_dir_given)
+ data_dir = args_info.data_dir_arg;
+
+ if (args_info.show_regex_given) {
+ if (LangDefLoader::show_regex(data_dir, args_info.show_regex_arg)) {
+ return (EXIT_SUCCESS);
+ }
+
+ return (EXIT_FAILURE);
}
- gen_version = (args_info.gen_version_flag != 0);
+ if (args_info.check_lang_given) {
+ cout << "checking " << args_info.check_lang_arg << "... ";
+ if (LangDefLoader::check_lang_def(data_dir, args_info.check_lang_arg)) {
+ cout << "OK" << endl;
+ return (EXIT_SUCCESS);
+ }
- /* initialization of global symbols */
- inputFileName = outputFileName = 0 ;
- sout = 0 ;
- docTitle = 0 ;
- docHeader = 0 ;
- docFooter = 0 ;
+ return (EXIT_FAILURE);
+ }
+
+ if (args_info.check_outlang_given) {
+ cout << "checking " << args_info.check_outlang_arg << "... ";
+ textstyles = parse_outlang_def(data_dir.c_str(),
+ args_info.check_outlang_arg);
+ cout << "OK" << endl;
+ return (EXIT_SUCCESS);
+ }
+
+ if (args_info.show_lang_elements_given) {
+ // we simply printe all the language elements defined in the
+ // language definition file
+ if (LangDefLoader::show_lang_elements(data_dir,
+ args_info.show_lang_elements_arg))
+ return EXIT_SUCCESS;
+
+ return EXIT_FAILURE;
+ }
+
+ string lang_map = args_info.lang_map_arg;
+ assert(lang_map.size());
+ if (! args_info.lang_def_given)
+ langmap = LangMapPtr(new LangMap(data_dir, lang_map));
- docTitle = args_info.title_arg ;
- header_fileName = args_info.header_arg ;
- footer_fileName = args_info.footer_arg ;
- verbose = args_info.verbose_given ;
- const string style_file = args_info.style_file_arg;
+ string outlang_map = args_info.outlang_map_arg;
+ assert(outlang_map.size());
+ if (! args_info.outlang_def_given)
+ outlangmap = LangMapPtr(new LangMap(data_dir, outlang_map));
- if ( args_info.tab_given > 0 )
- tabSpaces = args_info.tab_arg ;
+ if (args_info.lang_list_given) {
+ cout << "Supported languages (file extensions)\nand associated language definition files\n\n";
+ langmap->print();
+ return (EXIT_SUCCESS);
+ }
+
+ if (args_info.outlang_list_given) {
+ cout << "Supported output languages\nand associated language definition files\n\n";
+ outlangmap->print();
+ return (EXIT_SUCCESS);
+ }
- if (header_fileName)
- docHeader = read_file (header_fileName);
+ outputbuffer = new OutputBuffer;
+ // when debugging, always flush the output
+ outputbuffer->setAlwaysFlush(args_info.debug_langdef_given);
- if (footer_fileName)
- docFooter = read_file (footer_fileName);
+ string title;
+ string doc_header;
+ string doc_footer;
+ string css_url;
- cssUrl = args_info.css_arg ;
- use_css = ( cssUrl != 0 ) ;
+ if (docTitle)
+ title = docTitle;
+ if ((! docTitle) && inputFileName.size())
+ title = inputFileName;
+ if (docHeader)
+ doc_header = docHeader;
+ if (docFooter)
+ doc_footer = docFooter;
+ if (cssUrl)
+ css_url = cssUrl;
- entire_doc =
- (! args_info.no_doc_given) &&
- ( args_info.doc_given || (docTitle != 0) || use_css ) ;
+ if (args_info.line_number_ref_given)
+ args_info.line_number_given = args_info.line_number_ref_given;
- string inputFileName;
- if (args_info.input_given)
- inputFileName = args_info.input_arg ;
+ string outlangfile;
- string outputFileName;
- if ( inputFileName.size() && ! is_cgi && args_info.output_given)
- outputFileName = args_info.output_arg ;
+ if (! args_info.outlang_def_given) {
+ string out_format = args_info.out_format_arg;
- bool generate_to_stdout =
- (args_info.output_arg &&
- strcmp (args_info.output_arg, "STDOUT") == 0);
+ if (use_css)
+ out_format += "-css";
- if ( verbose )
- setMessager( new DefaultMessages ) ;
+ if (entire_doc)
+ out_format += "-doc";
- printMessage( PACKAGE ) ;
- printMessage( VERSION ) ;
- printMessage( argv[0] ) ;
+ outlangfile = outlangmap->get_file(out_format);
- if (verbose) {
- printMessage("command line arguments: ");
- for (int i = 0; i < argc; ++i) {
- printMessage(argv[i]);
+ if (! outlangfile.size()) {
+ cerr << PACKAGE << ": ";
+ cerr << "output language " << out_format<< " not handled" << endl;
+ return EXIT_FAILURE;
+ }
+ } else {
+ outlangfile = args_info.outlang_def_arg;
}
- }
-
- /*
- the starting default path to search for files is computed at
- run-time: it is
- the path of the binary + ".." + RELATIVEDATADIR
- this should make the package relocable (i.e., not stuck
- with a fixed installation directory).
- Of course, the GNU standards for installation directories
- should be followed, but this is not a problem if you use
- configure and make install features.
- If no path is specified in the running program we go back to
- the absolute datadir.
- */
- // this is defined in fileutil.cc
- string prefix_dir = get_file_path(argv[0]);
- if (prefix_dir.size())
- start_path = get_file_path(argv[0]) + RELATIVEDATADIR;
- else
- start_path = ABSOLUTEDATADIR;
-
- if (args_info.data_dir_given)
- data_dir = args_info.data_dir_arg;
-
- if (args_info.show_regex_given) {
- if (LangDefLoader::show_regex(data_dir, args_info.show_regex_arg)) {
- return(EXIT_SUCCESS);
+
+ textstyles = parse_outlang_def(data_dir.c_str(), outlangfile.c_str());
+
+ if (! textstyles->file_extension.size() && ! outputFileName.size()) {
+ cerr << PACKAGE << ": ";
+ cerr << "empty file extension in output language file " <<outlangfile << endl;
+ return EXIT_FAILURE;
+ }
+
+ const string ext = "." + textstyles->file_extension;
+
+ RefPosition refposition;
+ if (strcmp(args_info.gen_references_arg, "inline")==0)
+ refposition = INLINE;
+ else if (strcmp(args_info.gen_references_arg, "postline")==0)
+ refposition = POSTLINE;
+ else if (strcmp(args_info.gen_references_arg, "postdoc")==0)
+ refposition = POSTDOC;
+ else {
+ cerr << PACKAGE << ": ";
+ cerr << "Bug: unhandled reference position " <<args_info.gen_references_arg << endl;
+ return EXIT_FAILURE;
}
- return (EXIT_FAILURE);
- }
+ if (args_info.gen_references_given && strlen(args_info.ctags_arg)> 0) {
+ string ctags_cmd = args_info.ctags_arg;
+
+ if (inputFileName.size()) {
+ ctags_cmd += " ";
+ ctags_cmd += inputFileName;
+ } else if (args_info.inputs_num) {
+ for (i = 0; i < (args_info.inputs_num); ++i) {
+ ctags_cmd += " ";
+ ctags_cmd += args_info.inputs[i];
+ }
+ }
- if (args_info.check_lang_given) {
- cout << "checking " << args_info.check_lang_arg << "... ";
- if (LangDefLoader::check_lang_def(data_dir, args_info.check_lang_arg)) {
- cout << "OK" << endl;
- return(EXIT_SUCCESS);
+ run_ctags(ctags_cmd);
}
- return (EXIT_FAILURE);
- }
-
- if (args_info.check_outlang_given) {
- cout << "checking " << args_info.check_outlang_arg << "... ";
- textstyles = parse_outlang_def(data_dir.c_str(), args_info.check_outlang_arg);
- cout << "OK" << endl;
- return (EXIT_SUCCESS);
- }
-
- if (args_info.show_lang_elements_given) {
- // we simply printe all the language elements defined in the
- // language definition file
- if (LangDefLoader::show_lang_elements(data_dir, args_info.show_lang_elements_arg))
- return EXIT_SUCCESS;
-
- return EXIT_FAILURE;
- }
-
- string lang_map = args_info.lang_map_arg;
- assert(lang_map.size());
- if (! args_info.lang_def_given)
- langmap = LangMapPtr(new LangMap(data_dir, lang_map));
-
- string outlang_map = args_info.outlang_map_arg;
- assert(outlang_map.size());
- if (! args_info.outlang_def_given)
- outlangmap = LangMapPtr(new LangMap(data_dir, outlang_map));
-
- if (args_info.lang_list_given) {
- cout << "Supported languages (file extensions)\nand associated language definition files\n\n";
- langmap->print();
- return (EXIT_SUCCESS);
- }
-
- if (args_info.outlang_list_given) {
- cout << "Supported output languages\nand associated language definition files\n\n";
- outlangmap->print();
- return (EXIT_SUCCESS);
- }
-
- outputbuffer = new OutputBuffer;
- // when debugging, always flush the output
- outputbuffer->setAlwaysFlush( args_info.debug_langdef_given );
-
- string title;
- string doc_header;
- string doc_footer;
- string css_url;
-
- if (docTitle)
- title = docTitle;
- if ((! docTitle) && inputFileName.size())
- title = inputFileName;
- if (docHeader)
- doc_header = docHeader;
- if (docFooter)
- doc_footer = docFooter;
- if (cssUrl)
- css_url = cssUrl;
-
- if (args_info.line_number_ref_given)
- args_info.line_number_given = args_info.line_number_ref_given;
-
- string outlangfile;
-
- if (! args_info.outlang_def_given) {
- string out_format = args_info.out_format_arg;
-
- if (use_css)
- out_format += "-css";
-
- if (entire_doc)
- out_format += "-doc";
-
- outlangfile = outlangmap->get_file(out_format);
-
- if (! outlangfile.size()) {
- cerr << PACKAGE << ": ";
- cerr << "output language " << out_format
- << " not handled" << endl;
- return EXIT_FAILURE ;
+ if (tabSpaces)
+ preformatter = new Untabifier (tabSpaces);
+ else if (args_info.line_number_given)
+ preformatter = new Untabifier(8);
+ else
+ preformatter = new PreFormatter();
+
+ PreFormatterPtr chartranslator(textstyles->charTranslator);
+ preformatter->setFormatter(chartranslator);
+
+ string background_color;
+
+ generator_factory =new GeneratorFactory(textstyles, preformatter,
+ args_info.gen_references_given,
+ args_info.ctags_file_arg,
+ refposition, args_info.debug_langdef_given);
+
+ if (args_info.style_css_file_given) {
+ parseCssStyles(data_dir, args_info.style_css_file_arg,
+ generator_factory, background_color);
+ } else {
+ parseStyles(data_dir, style_file, generator_factory, background_color);
}
- } else {
- outlangfile = args_info.outlang_def_arg;
- }
-
- textstyles = parse_outlang_def(data_dir.c_str(), outlangfile.c_str());
-
- if (! textstyles->file_extension.size() && ! outputFileName.size()) {
- cerr << PACKAGE << ": ";
- cerr << "empty file extension in output language file " <<
- outlangfile << endl;
- return EXIT_FAILURE ;
- }
-
- const string ext = "." + textstyles->file_extension;
-
- RefPosition refposition;
- if (strcmp(args_info.gen_references_arg, "inline")==0)
- refposition = INLINE;
- else if (strcmp(args_info.gen_references_arg, "postline")==0)
- refposition = POSTLINE;
- else if (strcmp(args_info.gen_references_arg, "postdoc")==0)
- refposition = POSTDOC;
- else {
- cerr << PACKAGE << ": ";
- cerr << "Bug: unhandled reference position " <<
- args_info.gen_references_arg << endl;
- return EXIT_FAILURE ;
- }
-
- if (args_info.gen_references_given && strlen(args_info.ctags_arg) > 0) {
- string ctags_cmd = args_info.ctags_arg;
- if (inputFileName.size()) {
- ctags_cmd += " ";
- ctags_cmd += inputFileName;
- } else if (args_info.inputs_num) {
- for ( i = 0 ; i < (args_info.inputs_num) ; ++i ) {
- ctags_cmd += " ";
- ctags_cmd += args_info.inputs[i];
- }
+ generator_factory->addDefaultGenerator();
+
+ if (background_color != "")
+ background_color = generator_factory->preprocessColor(background_color);
+
+ docgenerator = new DocGenerator(title, inputFileName,
+ doc_header, doc_footer,
+ css_url, background_color, entire_doc,
+ textstyles->docTemplate.toStringBegin(),
+ textstyles->docTemplate.toStringEnd());;
+
+ if (is_cgi)
+ print_cgi_header();
+
+ // let's start the translation :-)
+
+ generate_line_num =(args_info.line_number_given || args_info.line_number_ref_given);
+ generate_ref = args_info.line_number_ref_given;
+
+ if (args_info.lang_def_arg)
+ lang_file = args_info.lang_def_arg;
+
+ int result= EXIT_SUCCESS;
+
+ if (args_info.src_lang_given)
+ source_language = args_info.src_lang_arg;
+
+ // if a stopwatch is created, when it is deleted (automatically
+ // since we're using a shared pointer, it will print the
+ // elapsed seconds.
+ boost::shared_ptr<StopWatch> stopwatch;
+ if (args_info.statistics_given)
+ stopwatch = boost::shared_ptr<StopWatch>(new StopWatch);
+
+ // first the --input file
+ if (! args_info.inputs_num) {
+ result = processFile(inputFileName, (generate_to_stdout ? "" : outputFileName), ext);
}
- run_ctags(ctags_cmd);
- }
-
- formatter = new TextFormatter;
-
- if (tabSpaces)
- preformatter = new Untabifier (tabSpaces);
- else if (args_info.line_number_given)
- preformatter = new Untabifier(8);
- else
- preformatter = new PreFormatter();
-
- PreFormatterPtr chartranslator(textstyles->charTranslator);
- preformatter->setFormatter(chartranslator);
-
- string background_color;
-
- generator_factory =
- new GeneratorFactory(textstyles, preformatter,
- args_info.gen_references_given,
- args_info.ctags_file_arg,
- refposition, args_info.debug_langdef_given);
-
- if (args_info.style_css_file_given) {
- parseCssStyles(data_dir, args_info.style_css_file_arg, generator_factory, background_color);
- } else {
- parseStyles(data_dir, style_file, generator_factory, background_color);
- }
-
- generator_factory->addDefaultGenerator();
-
- if (background_color != "")
- background_color = generator_factory->preprocessColor( background_color );
-
- docgenerator = new DocGenerator(title, inputFileName,
- doc_header, doc_footer,
- css_url, background_color, entire_doc,
- textstyles->docTemplate.toStringBegin(),
- textstyles->docTemplate.toStringEnd());;
-
- if ( is_cgi )
- print_cgi_header() ;
-
- // let's start the translation :-)
-
- generate_line_num =
- (args_info.line_number_given || args_info.line_number_ref_given);
- generate_ref = args_info.line_number_ref_given;
-
- if (args_info.lang_def_arg)
- lang_file = args_info.lang_def_arg;
-
- int result = EXIT_SUCCESS;
-
- if (args_info.src_lang_given)
- source_language = args_info.src_lang_arg;
-
- // if a stopwatch is created, when it is deleted (automatically
- // since we're using a shared pointer, it will print the
- // elapsed seconds.
- boost::shared_ptr<StopWatch> stopwatch;
- if (args_info.statistics_given)
- stopwatch = boost::shared_ptr<StopWatch>(new StopWatch);
-
- // first the --input file
- if ( ! args_info.inputs_num ) {
- result = processFile(inputFileName, (generate_to_stdout ? "" : outputFileName), ext) ;
- }
-
- // let's process other files, if there are any
- if ( args_info.inputs_num && !is_cgi ) {
- for ( i = 0 ; i < (args_info.inputs_num) ; ++i ) {
- progressInfo(string("Processing ") + args_info.inputs[i] + " ... ");
- const string &outputFileName = createOutputFileName (args_info.inputs[i],
- args_info.output_dir_arg, ext);
- result = processFile
- ( args_info.inputs[i],
- (generate_to_stdout ? "" : outputFileName),
- ext) ;
- if (result == EXIT_FAILURE)
- break;
- progressInfo("created " + outputFileName + "\n");
+ // let's process other files, if there are any
+ if (args_info.inputs_num && !is_cgi) {
+ for (i = 0; i < (args_info.inputs_num); ++i) {
+ progressInfo(string("Processing ")+ args_info.inputs[i] + " ... ");
+ const string &outputFileName = createOutputFileName(
+ args_info.inputs[i], args_info.output_dir_arg, ext);
+ result = processFile(args_info.inputs[i], (generate_to_stdout ? "" : outputFileName), ext);
+ if (result == EXIT_FAILURE)
+ break;
+ progressInfo("created " + outputFileName + "\n");
+ }
}
- }
- delete outputbuffer;
- outputbuffer = 0;
+ delete outputbuffer;
+ outputbuffer = 0;
- return (result);
+ return (result);
}
-void
-StartApp::print_copyright()
-{
- int i;
+void StartApp::print_copyright() {
+ int i;
- for (i = 1; i <= copyright_text_length; ++i)
- cout << copyright_text[i] << endl;;
+ for (i = 1; i <= copyright_text_length; ++i)
+ cout << copyright_text[i] << endl;
+ ;
}
-void
-StartApp::print_reportbugs()
-{
- int i;
+void StartApp::print_reportbugs() {
+ int i;
- for (i = 1; i <= reportbugs_text_length; ++i)
- cout << reportbugs_text[i] << endl;
+ for (i = 1; i <= reportbugs_text_length; ++i)
+ cout << reportbugs_text[i] << endl;
}
-void
-StartApp::print_version()
-{
- cout << "GNU " << PACKAGE << " " << VERSION << endl;
+void StartApp::print_version() {
+ cout << "GNU " << PACKAGE << " " << VERSION << endl;
}
int process_file(const char *file, TextFormatter *pre, const string &path,
- const string &lang_file, FileInfo *fileinfo, bool verbose)
-{
- RegExpStatePtr initial_state = LangDefLoader::get_lang_def(path, lang_file);
-
- try{
- printMessage("Processing " + string((file ? file : "standard input")) + " with regex");
- printMessage("Using language definition " + lang_file);
- RegExpEnginePtr engine;
- if (args_info.debug_langdef_given) {
- RegExpEngineDebug *debugEngine = new RegExpEngineDebug(initial_state, pre, fileinfo);
- debugEngine->setInteractive( strcmp(args_info.debug_langdef_arg, "interactive" ) == 0);
- engine = RegExpEnginePtr(debugEngine);
- } else {
- engine = RegExpEnginePtr(new RegExpEngine(initial_state, pre, fileinfo));
+ const string &lang_file, FileInfo *fileinfo, bool verbose) {
+ RegExpStatePtr initial_state = LangDefLoader::get_lang_def(path, lang_file);
+
+ try {
+ printMessage("Processing " + string((file ? file : "standard input")) + " with regex");
+ printMessage("Using language definition " + lang_file);
+ RegExpEnginePtr engine;
+ if (args_info.debug_langdef_given) {
+ RegExpEngineDebug *debugEngine = new RegExpEngineDebug(initial_state, pre, fileinfo);
+ debugEngine->setInteractive( strcmp(args_info.debug_langdef_arg, "interactive" ) == 0);
+ engine = RegExpEnginePtr(debugEngine);
+ } else {
+ engine = RegExpEnginePtr(new RegExpEngine(initial_state, pre, fileinfo));
+ }
+ engine->process_file(file);
}
- engine->process_file(file);
- }
- catch(...)
- {
- exitError("error during regex processing");
- }
- return 0;
+ catch(...)
+ {
+ exitError("error during regex processing");
+ }
+ return 0;
}
-string StartApp::inferLang(const string &inputFileName)
-{
- printMessage("inferring input language...", cerr);
- if (!inputFileName.size()) {
- cerr << PACKAGE << ": ";
- cerr << "missing feature: language inference requires input file" << endl;
- return "";
- }
+string StartApp::inferLang(const string &inputFileName) {
+ printMessage("inferring input language...", cerr);
+ if (!inputFileName.size()) {
+ cerr << PACKAGE << ": ";
+ cerr << "missing feature: language inference requires input file" << endl;
+ return "";
+ }
- LanguageInfer languageInfer;
+ LanguageInfer languageInfer;
- const string &result = languageInfer.infer(inputFileName);
- if (result.size()) {
- printMessage( "inferred input language: " + result, cerr ) ;
+ const string &result = languageInfer.infer(inputFileName);
+ if (result.size()) {
+ printMessage("inferred input language: " + result, cerr);
- // OK now map it into a .lang file
- string mapped_lang = langmap->get_file(result);
+ // OK now map it into a .lang file
+ string mapped_lang = langmap->get_file(result);
- if (!mapped_lang.size()) {
- // try the lower version
- mapped_lang = langmap->get_file(Utils::tolower(result));
- }
+ if (!mapped_lang.size()) {
+ // try the lower version
+ mapped_lang = langmap->get_file(Utils::tolower(result));
+ }
- if (mapped_lang.size()) {
- return mapped_lang;
+ if (mapped_lang.size()) {
+ return mapped_lang;
+ }
+ } else {
+ printMessage("couldn't infer input language", cerr);
}
- } else {
- printMessage( "couldn't infer input language", cerr ) ;
- }
- return "";
+ return "";
}
-int
-StartApp::processFile(const string &inputFileName, const string &outputFileName, const string &file_extension)
-{
- FILE *in = 0;
- bool deleteOStream = false ;
- bool langSpecFound = false;
-
- if ( outputFileName.size() ) {
- sout = new ofstream(outputFileName.c_str()) ;
- if ( ! (*sout) ) {
- cerr << "Error in creating " << outputFileName << " for output" << endl ;
- return EXIT_FAILURE ;
+int StartApp::processFile(const string &inputFileName,
+ const string &outputFileName, const string &file_extension) {
+ FILE *in = 0;
+ bool deleteOStream = false;
+ bool langSpecFound = false;
+
+ if (outputFileName.size()) {
+ sout = new ofstream(outputFileName.c_str());
+ if (! (*sout)) {
+ cerr << "Error in creating " << outputFileName << " for output" << endl;
+ return EXIT_FAILURE;
+ }
+ deleteOStream = true;
+ printMessage("output file: " + inputFileName);
}
- deleteOStream = true;
- printMessage("output file: " + inputFileName);
- }
- if (inputFileName.size())
- {
- unsigned int lines = get_line_count (inputFileName);
- printMessage("input file: " + inputFileName);
-
- line_num_digit = 0;
- while (lines)
- {
- ++line_num_digit;
- lines /= 10;
+ if (inputFileName.size()) {
+ unsigned int lines = get_line_count(inputFileName);
+ printMessage("input file: " + inputFileName);
+
+ line_num_digit = 0;
+ while (lines) {
+ ++line_num_digit;
+ lines /= 10;
}
+ } else
+ line_num_digit = 5;
+ // if we read from stdin, we can't read the file in advance and
+ // check how many lines of code it contains. In this case set
+ // the number of digit for the line number to 5.
+
+ /*
+ * Use default values for any options not provided
+ */
+ if (sout == 0) {
+ sout = &cout;
}
- else
- line_num_digit = 5;
- // if we read from stdin, we can't read the file in advance and
- // check how many lines of code it contains. In this case set
- // the number of digit for the line number to 5.
-
- /*
- * Use default values for any options not provided
- */
- if (sout == 0) {
- sout = &cout;
- }
-
- if (in == 0) {
- ; /* Well stdin already points to stdin so, .... */
- }
-
- OutputGenerator *outputgenerator = 0;
-
- if (generate_line_num)
- outputgenerator =
- new LineOutputGenerator(*sout,
- &(textstyles->refstyle.anchor), generate_ref,
- (args_info.line_number_ref_given ? args_info.line_number_ref_arg : ""),
- textstyles->line_prefix);
- else
- outputgenerator = new OutputGenerator(*sout, textstyles->line_prefix);
-
- // when debugging, always flush the output
- outputgenerator->setAlwaysFlush( args_info.debug_langdef_given );
-
- outputbuffer->setOutputGenerator(outputgenerator);
-
- docgenerator->set_gen_version (gen_version);
-
- printMessage( "translating source code... ", cerr ) ;
-
- string langfile = lang_file;
-
- if (args_info.infer_lang_given) {
- langfile = inferLang(inputFileName);
-
- if (langfile.size())
- langSpecFound = true;
- }
-
- // language inference has the precedence (if it succeeds)
- if (!langfile.size() && !langSpecFound) {
- // find the language definition file associated to a language
- if (source_language.size()) {
- langfile = langmap->get_file(source_language);
- if (! langfile.size())
- {
- if (! args_info.failsafe_given)
- {
- cerr << PACKAGE << ": ";
- cerr << "source language " << source_language
- << " not handled" << endl;
- return EXIT_FAILURE ;
- }
+
+ if (in == 0) {
+ ; /* Well stdin already points to stdin so, .... */
+ }
+
+ OutputGenerator *outputgenerator = 0;
+
+ if (generate_line_num)
+ outputgenerator =new OutputGenerator(*sout, generator_factory->getTextFormatter()->getGenerator("linenum"),
+ &(textstyles->refstyle.anchor), generate_ref,
+ (args_info.line_number_ref_given ? args_info.line_number_ref_arg : ""),
+ textstyles->line_prefix);
+ else
+ outputgenerator = new OutputGenerator(*sout, textstyles->line_prefix);
+
+ // when debugging, always flush the output
+ outputgenerator->setAlwaysFlush(args_info.debug_langdef_given);
+
+ outputbuffer->setOutputGenerator(outputgenerator);
+
+ docgenerator->set_gen_version(gen_version);
+
+ printMessage("translating source code... ", cerr);
+
+ string langfile = lang_file;
+
+ if (args_info.infer_lang_given) {
+ langfile = inferLang(inputFileName);
+
+ if (langfile.size())
+ langSpecFound = true;
+ }
+
+ // language inference has the precedence (if it succeeds)
+ if (!langfile.size() && !langSpecFound) {
+ // find the language definition file associated to a language
+ if (source_language.size()) {
+ langfile = langmap->get_file(source_language);
+ if (! langfile.size()) {
+ if (! args_info.failsafe_given) {
+ cerr << PACKAGE << ": ";
+ cerr << "source language " << source_language<< " not handled" << endl;
+ return EXIT_FAILURE;
+ }
+ } else
+ langSpecFound = true;
+ } else {
+ if (! inputFileName.size()) {
+ if (! args_info.failsafe_given) {
+ cerr << PACKAGE << ": ";
+ cerr << "when using stdin, please specify a source language"<< endl;
+ return EXIT_FAILURE;
+ }
+ }
+
+ string file_ext = get_file_extension(inputFileName);
+
+ if (file_ext != "")
+ langfile = langmap->get_file(file_ext);
+
+ if (langfile.size())
+ langSpecFound = true;
}
- else
+ } else
langSpecFound = true;
- } else {
- if (! inputFileName.size())
- {
- if (! args_info.failsafe_given)
- {
- cerr << PACKAGE << ": ";
- cerr << "when using stdin, please specify a source language"
- << endl;
- return EXIT_FAILURE ;
- }
- }
- string file_ext = get_file_extension (inputFileName);
+ // language inference is always performed, if the other attempts failed
+ // if --infer-lang was specified at command line, then the inference
+ // has already been performed, otherwise we perform it now
+ if (!langSpecFound && !args_info.infer_lang_given) {
+ langfile = inferLang(inputFileName);
- if (file_ext != "")
- langfile = langmap->get_file(file_ext);
+ if (langfile.size())
+ langSpecFound = true;
+ }
- if (langfile.size())
+ if (!langSpecFound && args_info.failsafe_given) {
+ // OK we use default.lang
+ langfile = "default.lang";
langSpecFound = true;
}
- }
- else
- langSpecFound = true;
-
- // language inference is always performed, if the other attempts failed
- // if --infer-lang was specified at command line, then the inference
- // has already been performed, otherwise we perform it now
- if (!langSpecFound && !args_info.infer_lang_given) {
- langfile = inferLang(inputFileName);
-
- if (langfile.size())
- langSpecFound = true;
- }
-
- if (!langSpecFound && args_info.failsafe_given) {
- // OK we use default.lang
- langfile = "default.lang";
- langSpecFound = true;
- }
-
- if (langSpecFound)
- {
- docgenerator->generate_start_doc ();
-
- const string &i_file_name = get_input_file_name(inputFileName);
- const char *input_file_name = (i_file_name.size() ? i_file_name.c_str() : 0);
-
- FileInfo fileinfo(i_file_name, outputFileName);
- process_file(input_file_name, formatter, data_dir, langfile,
- &fileinfo, verbose);
-
- outputbuffer->flush();
-
- docgenerator->generate_end_doc ();
-
- printMessage( "done !", cerr ) ;
- } else {
- cerr << PACKAGE << ": ";
- cerr << "unknown input language for "
- << (inputFileName.size() ? inputFileName : "(stdin)") << endl;
- return EXIT_FAILURE ;
- }
-
- /*
- else // we're in failsafe mode so we simply copy the file to the output
- {
- istream *input;
- if(! inputFileName.size())
- input = &cin;
- else
- input = open_file_istream_or_error(inputFileName);
- *sout << input->rdbuf();
+ if (langSpecFound) {
+ docgenerator->generate_start_doc();
+
+ const string &i_file_name = get_input_file_name(inputFileName);
+ const char *input_file_name = (i_file_name.size() ? i_file_name.c_str() : 0);
- if (input != &cin)
- delete input;
- }
- */
+ FileInfo fileinfo(i_file_name, outputFileName);
+ process_file(input_file_name, generator_factory->getTextFormatter(),
+ data_dir, langfile, &fileinfo, verbose);
- sout->flush ();
+ outputbuffer->flush();
- if ( deleteOStream )
- delete sout ;
+ docgenerator->generate_end_doc();
+
+ printMessage("done !", cerr);
+ } else {
+ cerr << PACKAGE << ": ";
+ cerr << "unknown input language for "<< (inputFileName.size() ? inputFileName : "(stdin)") << endl;
+ return EXIT_FAILURE;
+ }
- delete outputgenerator;
+ /*
+ else // we're in failsafe mode so we simply copy the file to the output
+ {
+ istream *input;
+ if(! inputFileName.size())
+ input = &cin;
+ else
+ input = open_file_istream_or_error(inputFileName);
- return EXIT_SUCCESS;
+ *sout << input->rdbuf();
+
+ if (input != &cin)
+ delete input;
+ }
+ */
+
+ sout->flush();
+
+ if (deleteOStream)
+ delete sout;
+
+ delete outputgenerator;
+
+ return EXIT_SUCCESS;
}
-void run_ctags(const string &cmd)
-{
- printMessage("Running ctags: " + cmd);
+void run_ctags(const string &cmd) {
+ printMessage("Running ctags: " + cmd);
- int res = system(cmd.c_str());
+ int res = system(cmd.c_str());
- if (res != 0) {
- exitError("error running ctags");
- }
+ if (res != 0) {
+ exitError("error running ctags");
+ }
}
-void
-print_cgi_header()
-{
- printf( "Content-type: text/html\n" ) ;
- printf( "\n" ) ;
+void print_cgi_header() {
+ printf("Content-type: text/html\n");
+ printf("\n");
}
diff --git a/src/startapp.h b/src/startapp.h
index f98327b..8c83240 100644
--- a/src/startapp.h
+++ b/src/startapp.h
@@ -23,7 +23,6 @@
#include "langmap.h"
#include "textstyles.h"
-class TextFormatter;
class PreFormatter;
class DocGenerator;
class GeneratorFactory;
@@ -33,7 +32,6 @@ class StartApp
protected:
char *inputFileName, *outputFileName; /* what we're reading */
DocGenerator *docgenerator;
- TextFormatter *formatter;
PreFormatter *preformatter;
LangMapPtr langmap;
LangMapPtr outlangmap;
diff --git a/src/texinfo.style b/src/texinfo.style
index 3adcf37..9d15cd7 100644
--- a/src/texinfo.style
+++ b/src/texinfo.style
@@ -1,6 +1,7 @@
keyword, type b ;
variable f, i ;
string f ;
+regexp f ;
comment nf, i, noref ;
preproc b ;
diff --git a/src/xhtml_notfixed.outlang b/src/xhtml_notfixed.outlang
new file mode 100644
index 0000000..d5a8eb0
--- /dev/null
+++ b/src/xhtml_notfixed.outlang
@@ -0,0 +1,16 @@
+include "xhtml_common.outlang"
+
+fixed "<span style=\"font-family: monospace;\">$text</span>"
+
+doctemplate
+"<!-- Generator: $additional -->
+$header"
+"$footer"
+end
+
+translations
+"\n" "<br />\n"
+" " "&nbsp; "
+'^ ' "&nbsp;" # a space at the beginning of a line
+"\t" "&nbsp; &nbsp; &nbsp; &nbsp; "
+end
diff --git a/src/xhtmltable.outlang b/src/xhtmltable.outlang
index 217b09d..ec73fc1 100644
--- a/src/xhtmltable.outlang
+++ b/src/xhtmltable.outlang
@@ -1,14 +1,7 @@
include "xhtml_common.outlang"
doctemplate
-"<table style=\"background-color: $docbgcolor\">
-<tbody>
-<tr><td>
-<pre><tt>"
-"</tt></pre>
-</td></tr>
-</tbody>
-</table>
-"
+"<table style=\"background-color: $docbgcolor\"><tbody><tr><td><pre><tt>"
+"</tt></pre></td></tr></tbody></table>"
end
generated by cgit v1.2.3 (git 2.39.1) at 2025年09月16日 07:37:56 +0000

AltStyle によって変換されたページ (->オリジナル) /