src-highlite.git - src-highlite

index : src-highlite.git

src-highlite

summary refs log tree commit diff

path: root/src/lib/regexpengine.cpp

diff options

Diffstat (limited to 'src/lib/regexpengine.cpp')

-rw-r--r--

src/lib/regexpengine.cpp

265

1 files changed, 265 insertions, 0 deletions

diff --git a/src/lib/regexpengine.cpp b/src/lib/regexpengine.cpp
new file mode 100644
index 0000000..37d377d
--- /dev/null
+++ b/src/lib/regexpengine.cpp

@@ -0,0 +1,265 @@

+//

+// C++ Implementation: regexpengine

+//

+// Description:

+//

+// Copyright: See COPYING file that comes with this distribution

+//

+#include "regexpengine.h"

+RegExpEngine::~RegExpEngine() {

+#include <fstream>

+#include <iostream>

+#include <stdlib.h>

+#include "textformatter.h"

+#include "keys.h"

+#include "langdefloader.h"

+#include "messages.h"

+#include "parserinfo.h"

+// purpose:

+// takes the contents of a file and transform to

+// syntax highlighted code in html format

+using namespace std;

+typedef enum {FOUND_EOF=0, FOUND_NL, FOUND_END} load_line_ret;

+load_line_ret load_line(std::string& s, std::istream& is) {

+ s.erase();

+ if (is.bad()|| is.eof())

+ return FOUND_EOF;

+ char c;

+ while (is.get(c)) {

+ if (c == '\n')

+ return FOUND_NL;

+ if (c != '\r')

+ s.append(1, c);

+ }

+ return FOUND_END;

+void RegExpEngine::process_file(const char *file) {

+ istream *is = 0;

+ if (file) {

+ is = new ifstream(file);

+ if (!is || ! (*is)) {

+ cerr << "Error in opening " << file<< " for input" << endl;

+ exit(1);

+ }

+ } else

+ is = &cin;

+ std::string s;

+ std::string::const_iterator start, end;

+ boost::smatch match;

+ boost::smatch what;

+ boost::match_flag_type flags;

+ // the regexp state we try at the moment.

+ RegExpStatePtr alternative;

+ // the regexp state matching best;

+ RegExpStatePtr best_alternative;

+ initial_state = currentstate;

+ fileinfo->line = 1;

+ // for selecting the formatter

+ int index_of_formatter = 0;

+ // for selecting the subexpression (or the whole expression)

+ int index_of_subexpression = 0;

+ int smallest_prefix = -1;

+ int biggest_length = -1;

+ string prefix;

+ load_line_ret ret;

+ while ((ret = load_line(s, *is)) != FOUND_EOF) {

+ bool matched = true;

+ bool found_eol = false;

+ start = s.begin();

+ end = s.end();

+ // reset the flags

+ flags = boost::match_default;

+ // always start with the current state

+ alternative = currentstate;

+ while (matched) {

+ matched = false;

+ if (alternative->has_alternative()) {

+ // this means that the state contains a list of alternative states

+ // so we must try to match all the states and use the one that matches best

+ // i.e., with the smallest prefix and the biggest match length

+ smallest_prefix = -1;

+ biggest_length = -1;

+ while (alternative.get()) {

+ if (boost::regex_search(start, end, match,

+ alternative->reg_exp, flags)) {

+ const std::string &match_prefix = match.prefix();

+ if (smallest_prefix < 0 || (boost::smatch::size_type)smallest_prefix >= match_prefix.size()) {

+ if ((boost::smatch::size_type)smallest_prefix > match_prefix.size()|| biggest_length < 0 || (boost::smatch::difference_type)biggest_length < match.length()) {

+ prefix = match.prefix();

+ smallest_prefix = match_prefix.size();

+ biggest_length = match.length();

+ best_alternative = alternative;

+ matched = true;

+ // copy it, otherwise the next call will overwrite it

+ what = match;

+ }

+ alternative = alternative->alternative;

+ }

+ if (matched) {

+ // store the one that matched best

+ alternative = best_alternative;

+ } else {

+ // reset the original current_state

+ alternative = currentstate;

+ }

+ } else if (boost::regex_search(start, end, what,

+ alternative->reg_exp, flags)) {

+ // otherwise, all the alternatives of the state are stored as

+ // a big alternation, where all alternatives are grouped

+ prefix = what.prefix();

+ matched = true;

+ }

+ if (matched) {

+ if (alternative->hasMarkedAlternatives) {

+ // we must inspect all the sub_matches

+ // to find the subexpression that matched

+ for (unsigned int i = 1; i < what.size(); ++i) {

+ if (what[i].matched) {

+ index_of_formatter = i;

+ index_of_subexpression = i;

+ break; // no other match is possible

+ }

+ } else {

+ // the alternative state contains only one expression, with

+ // marked subexpressions, so we must format the whole match

+ // the formatter at 0 is the normal formatter

+ index_of_formatter = 1;

+ // we select the whole match

+ index_of_subexpression = 0;

+ // this is OK also in the case when allAlternativesCanMatch:

+ // we consider the whole expression, and all formatters share the

+ // same exit state, so we can use the first one

+ }

+ // part that possibly did not match

+ if (prefix.size())

+ format(-1, alternative, prefix);

+ if (alternative->allAlternativesCanMatch) {

+ // we must format each subexpression that matched

+ for (unsigned int i = 1; i < what.size(); ++i) {

+ if (what[i].matched) {

+ format(i, alternative, what[i]);

+ }

+ // only the last formatter has the correct next state

+ index_of_formatter = what.size() - 1;

+ } else {

+ // format the part that matched

+ format(index_of_formatter, alternative,

+ what[index_of_subexpression]);

+ }

+ if (alternative->formatters[index_of_formatter]->getNextState()) {

+ // we must enter another state

+ enterState(alternative, index_of_formatter);

+ } else if (alternative->formatters[index_of_formatter]->exit_state_level) {

+ if (alternative->formatters[index_of_formatter]->exit_all) {

+ // we must go back to the outermost state

+ exitAll();

+ } else {

+ // we must get back to exit_state_level states

+ exitState(alternative->formatters[index_of_formatter]->exit_state_level);

+ }

+ // now let's continue with what's left of the original input

+ start = what[index_of_subexpression].second;

+ if (!(*start)) {

+ if (found_eol)

+ matched = false; // we had already matched end of line

+ // we might need to match the eol itself, so let's perform another loop

+ found_eol = true;

+ }

+ if (what[0].first != what[0].second) {

+ // we actually consumed something, so the start of the string

+ // must not be interpreted as the beginning of the line

+ flags |= boost::match_not_bol;

+ }

+ // we always search for the next match by using the original current state

+ alternative = currentstate;

+ } else {

+ // format the non-matching part as normal

+ format(-1, alternative, string(start, end));

+ matched = false;

+ }

+ if (ret == FOUND_NL)

+ formatter->format_nl("\n");

+ (fileinfo->line)++;

+ }

+ // make sure we flush all the buffered parts

+ formatter->flush();

+ if (file)

+ delete is;

+ currentstate = initial_state; // reset the initial state

+void RegExpEngine::enterState(RegExpStatePtr state, int index) {

+ states_stack.push(currentstate);

+ currentstate = state->formatters[index]->getNextState();

+void RegExpEngine::exitState(int level) {

+ // remove additional levels

+ for (int l = 1; l < level; ++l)

+ states_stack.pop();

+ currentstate = states_stack.top();

+ states_stack.pop();

+void RegExpEngine::exitAll() {

+ currentstate = initial_state;

+ states_stack = stack_of_states();

+void RegExpEngine::format(int index, RegExpStatePtr state, const std::string &s) {

+ formatter->format(state->get_elem(index), s, fileinfo);

generated by cgit v1.2.3 (git 2.25.1) at 2025年09月17日 12:42:59 +0000