src-highlite.git - src-highlite

index : src-highlite.git
src-highlite
summary refs log tree commit diff
path: root/src/lib/regexpengine.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/regexpengine.cpp')
-rw-r--r--src/lib/regexpengine.cpp 265
1 files changed, 265 insertions, 0 deletions
diff --git a/src/lib/regexpengine.cpp b/src/lib/regexpengine.cpp
new file mode 100644
index 0000000..37d377d
--- /dev/null
+++ b/src/lib/regexpengine.cpp
@@ -0,0 +1,265 @@
+//
+// C++ Implementation: regexpengine
+//
+// Description:
+//
+//
+// Author: Lorenzo Bettini <http://www.lorenzobettini.it>, (C) 1999-2007
+//
+// Copyright: See COPYING file that comes with this distribution
+//
+//
+#include "regexpengine.h"
+
+RegExpEngine::~RegExpEngine() {
+}
+
+#include <fstream>
+#include <iostream>
+#include <stdlib.h>
+
+#include "textformatter.h"
+#include "keys.h"
+#include "langdefloader.h"
+#include "messages.h"
+#include "parserinfo.h"
+
+// purpose:
+// takes the contents of a file and transform to
+// syntax highlighted code in html format
+
+using namespace std;
+
+typedef enum {FOUND_EOF=0, FOUND_NL, FOUND_END} load_line_ret;
+
+load_line_ret load_line(std::string& s, std::istream& is) {
+ s.erase();
+ if (is.bad()|| is.eof())
+ return FOUND_EOF;
+
+ char c;
+ while (is.get(c)) {
+ if (c == '\n')
+ return FOUND_NL;
+ if (c != '\r')
+ s.append(1, c);
+ }
+
+ return FOUND_END;
+}
+
+void RegExpEngine::process_file(const char *file) {
+ istream *is = 0;
+
+ if (file) {
+ is = new ifstream(file);
+ if (!is || ! (*is)) {
+ cerr << "Error in opening " << file<< " for input" << endl;
+ exit(1);
+ }
+ } else
+ is = &cin;
+
+ std::string s;
+
+ std::string::const_iterator start, end;
+ boost::smatch match;
+ boost::smatch what;
+ boost::match_flag_type flags;
+
+ // the regexp state we try at the moment.
+ RegExpStatePtr alternative;
+
+ // the regexp state matching best;
+ RegExpStatePtr best_alternative;
+
+ initial_state = currentstate;
+
+ fileinfo->line = 1;
+
+ // for selecting the formatter
+ int index_of_formatter = 0;
+ // for selecting the subexpression (or the whole expression)
+ int index_of_subexpression = 0;
+
+ int smallest_prefix = -1;
+ int biggest_length = -1;
+
+ string prefix;
+
+ load_line_ret ret;
+ while ((ret = load_line(s, *is)) != FOUND_EOF) {
+ bool matched = true;
+ bool found_eol = false;
+ start = s.begin();
+ end = s.end();
+ // reset the flags
+ flags = boost::match_default;
+
+ // always start with the current state
+ alternative = currentstate;
+
+ while (matched) {
+ matched = false;
+
+ if (alternative->has_alternative()) {
+ // this means that the state contains a list of alternative states
+ // so we must try to match all the states and use the one that matches best
+ // i.e., with the smallest prefix and the biggest match length
+ smallest_prefix = -1;
+ biggest_length = -1;
+ while (alternative.get()) {
+ if (boost::regex_search(start, end, match,
+ alternative->reg_exp, flags)) {
+ const std::string &match_prefix = match.prefix();
+ if (smallest_prefix < 0 || (boost::smatch::size_type)smallest_prefix >= match_prefix.size()) {
+ if ((boost::smatch::size_type)smallest_prefix > match_prefix.size()|| biggest_length < 0 || (boost::smatch::difference_type)biggest_length < match.length()) {
+ prefix = match.prefix();
+ smallest_prefix = match_prefix.size();
+ biggest_length = match.length();
+ best_alternative = alternative;
+ matched = true;
+ // copy it, otherwise the next call will overwrite it
+ what = match;
+ }
+ }
+ }
+ alternative = alternative->alternative;
+ }
+
+ if (matched) {
+ // store the one that matched best
+ alternative = best_alternative;
+ } else {
+ // reset the original current_state
+ alternative = currentstate;
+ }
+ } else if (boost::regex_search(start, end, what,
+ alternative->reg_exp, flags)) {
+ // otherwise, all the alternatives of the state are stored as
+ // a big alternation, where all alternatives are grouped
+ prefix = what.prefix();
+ matched = true;
+ }
+
+ if (matched) {
+ if (alternative->hasMarkedAlternatives) {
+ // we must inspect all the sub_matches
+ // to find the subexpression that matched
+ for (unsigned int i = 1; i < what.size(); ++i) {
+ if (what[i].matched) {
+ index_of_formatter = i;
+ index_of_subexpression = i;
+ break; // no other match is possible
+ }
+ }
+ } else {
+ // the alternative state contains only one expression, with
+ // marked subexpressions, so we must format the whole match
+
+ // the formatter at 0 is the normal formatter
+ index_of_formatter = 1;
+ // we select the whole match
+ index_of_subexpression = 0;
+
+ // this is OK also in the case when allAlternativesCanMatch:
+ // we consider the whole expression, and all formatters share the
+ // same exit state, so we can use the first one
+ }
+
+ // part that possibly did not match
+ if (prefix.size())
+ format(-1, alternative, prefix);
+
+ if (alternative->allAlternativesCanMatch) {
+ // we must format each subexpression that matched
+ for (unsigned int i = 1; i < what.size(); ++i) {
+ if (what[i].matched) {
+ format(i, alternative, what[i]);
+ }
+ }
+
+ // only the last formatter has the correct next state
+ index_of_formatter = what.size() - 1;
+ } else {
+ // format the part that matched
+ format(index_of_formatter, alternative,
+ what[index_of_subexpression]);
+ }
+
+ if (alternative->formatters[index_of_formatter]->getNextState()) {
+ // we must enter another state
+ enterState(alternative, index_of_formatter);
+ } else if (alternative->formatters[index_of_formatter]->exit_state_level) {
+ if (alternative->formatters[index_of_formatter]->exit_all) {
+ // we must go back to the outermost state
+ exitAll();
+ } else {
+ // we must get back to exit_state_level states
+ exitState(alternative->formatters[index_of_formatter]->exit_state_level);
+ }
+ }
+
+ // now let's continue with what's left of the original input
+ start = what[index_of_subexpression].second;
+ if (!(*start)) {
+ if (found_eol)
+ matched = false; // we had already matched end of line
+
+ // we might need to match the eol itself, so let's perform another loop
+ found_eol = true;
+ }
+
+ if (what[0].first != what[0].second) {
+ // we actually consumed something, so the start of the string
+ // must not be interpreted as the beginning of the line
+ flags |= boost::match_not_bol;
+ }
+
+ // we always search for the next match by using the original current state
+ alternative = currentstate;
+ } else {
+ // format the non-matching part as normal
+ format(-1, alternative, string(start, end));
+ matched = false;
+ }
+ }
+
+ if (ret == FOUND_NL)
+ formatter->format_nl("\n");
+
+ (fileinfo->line)++;
+ }
+
+ // make sure we flush all the buffered parts
+ formatter->flush();
+
+ if (file)
+ delete is;
+
+ currentstate = initial_state; // reset the initial state
+}
+
+void RegExpEngine::enterState(RegExpStatePtr state, int index) {
+ states_stack.push(currentstate);
+ currentstate = state->formatters[index]->getNextState();
+}
+
+void RegExpEngine::exitState(int level) {
+ // remove additional levels
+ for (int l = 1; l < level; ++l)
+ states_stack.pop();
+
+ currentstate = states_stack.top();
+ states_stack.pop();
+}
+
+void RegExpEngine::exitAll() {
+ currentstate = initial_state;
+ states_stack = stack_of_states();
+}
+
+void RegExpEngine::format(int index, RegExpStatePtr state, const std::string &s) {
+ formatter->format(state->get_elem(index), s, fileinfo);
+}
+
generated by cgit v1.2.3 (git 2.25.1) at 2025年09月17日 12:42:59 +0000

AltStyle によって変換されたページ (->オリジナル) /