10
\$\begingroup\$

Task:

This function searches given null terminated string pStr by given subset of regular expression pMatch.

Return value is matched string appeared in pStr.

If nothing has matched, return value is empty string.

The subset of regular expression grammar is defined as follows:

  • A string is set of 8-bit characters. Beware that it is not 7-bit.
  • The metacharacter \ is an escape character, that means any single letter appeared next used as is.
  • The characters in squared brackets metacharacters [ and ] is a sequence of literal characters or metacharacters. Immediately after ], another metacharacters in curly braces { num } must be followed. The num is a sequence of digits which specifies the number of times the previous literal character or metacharacter sequence in squared brackets must be repeated. e.g. "abc[def]{2}ghi" matches "abcdefdefghi".
  • The squared brackets metacharacters can be nested. e.g. "abc[def[ghi]{3}jkl]{2}mno" matches "abcdefghighighijkldefghighighijklmno".
  • In case of matching pattern given by pMatch is a grammatical error, this function returns a string "ERROR".

My Solution:

Define a state machine and put regular expression strings into stacks. When used then pop up, multiple by N times and push to stack again.

 escape <-- escape <---- escape <-----
 | | | | | |
 | | | | | |
 normal --> repeatStrStart --> repeatStrEnd --> repeatNumStart --
 ^ ^ | |
 | | | |
 | ------------ |
 ----------------------------------------------------

Code:

string PatternSearch(const unsigned char* pStr, const unsigned char* pMatch)
{
 enum status {
 normal,
 escape,
 repeatStrStart,
 repeatStrEnd,
 repeatNumStart
 };
 status current = normal;
 status restore = normal;
 const static string strErr = "ERROR";
 stringstream bufNormal;
 stringstream bufRepeat;
 stringstream bufTmp;
 stack<unsigned char> stackBracket;
 stack<string> stackRepeat;
 unsigned long repeat = 0;
 size_t check = 0;
 int i = 0;
 for(i=0; pMatch[i] != '0円'; i++) {
 if(current == normal) {
 if(pMatch[i] == '\\') {
 restore = current;
 current = escape;
 }
 else if(pMatch[i] == '[') {
 current = repeatStrStart;
 stackBracket.push(pMatch[i]);
 }
 else {
 bufNormal << pMatch[i];
 }
 }
 else if(current == escape) {
 if(restore == normal) {
 bufNormal << pMatch[i];
 current = restore;
 }
 else if(restore == repeatStrStart) {
 bufRepeat << pMatch[i];
 current = restore;
 }
 else if(restore == repeatNumStart) {
 bufRepeat << pMatch[i];
 current = restore;
 }
 else {
 bufNormal.str(strErr);
 bufNormal.clear();
 break;
 }
 }
 else if(current == repeatStrStart) {
 if(pMatch[i] == '\\') {
 restore = current;
 current = escape;
 }
 else if(pMatch[i] == '[') {
 stackBracket.push(pMatch[i]);
 stackRepeat.push(bufRepeat.str());
 bufRepeat.str("");
 bufRepeat.clear();
 }
 else if(pMatch[i] == ']') {
 if(stackBracket.empty() == false && stackBracket.top() == '[') {
 current = repeatStrEnd;
 if(stackBracket.size() > stackRepeat.size()) {
 stackRepeat.push(bufRepeat.str());
 }
 else {
 bufTmp.str("");
 bufTmp.clear();
 bufTmp << stackRepeat.top();
 bufTmp << bufRepeat.str();
 stackRepeat.pop();
 stackRepeat.push(bufTmp.str());
 bufTmp.str("");
 bufTmp.clear();
 }
 stackBracket.pop();
 bufRepeat.str("");
 bufRepeat.clear();
 }
 else {
 bufNormal.str(strErr);
 bufNormal.clear();
 break;
 }
 }
 else {
 bufRepeat << pMatch[i];
 }
 }
 else if(current == repeatStrEnd) {
 if(pMatch[i] == '{') {
 stackBracket.push(pMatch[i]);
 current = repeatNumStart;
 }
 else {
 bufNormal.str(strErr);
 bufNormal.clear();
 break;
 }
 }
 else {
 if(pMatch[i] == '\\') {
 restore = current;
 current = escape;
 }
 else if(pMatch[i] == '}') {
 if(stackBracket.empty() == false && stackBracket.top() == '{') {
 stackBracket.pop();
 try {
 repeat = stoul(/*str=*/bufRepeat.str(), /*idx=*/&check);
 if(check == bufRepeat.str().length()) {
 bufRepeat.str("");
 bufRepeat.clear();
 while(repeat--) {
 bufRepeat << stackRepeat.top();
 }
 stackRepeat.pop();
 if(stackRepeat.empty()) {
 bufNormal << bufRepeat.str();
 }
 else {
 bufTmp.str("");
 bufTmp.clear();
 bufTmp << stackRepeat.top();
 bufTmp << bufRepeat.str();
 stackRepeat.pop();
 stackRepeat.push(bufTmp.str());
 }
 bufTmp.str("");
 bufTmp.clear();
 bufRepeat.str("");
 bufRepeat.clear();
 }
 else {
 bufNormal.str(strErr);
 bufNormal.clear();
 break;
 }
 }
 catch(invalid_argument e) {
 bufNormal.str(strErr);
 bufNormal.clear();
 break;
 }
 catch(out_of_range e) {
 bufNormal.str(strErr);
 bufNormal.clear();
 break;
 }
 if(stackBracket.empty()) {
 current = normal;
 }
 else {
 current = repeatStrStart;
 }
 }
 else {
 bufRepeat.str(strErr);
 bufRepeat.clear();
 break;
 }
 }
 else {
 bufRepeat << pMatch[i];
 }
 }
 }
 if(stackBracket.empty() == false || stackRepeat.empty() == false) {
 bufNormal.str(strErr);
 bufNormal.clear();
 }
 else if(current != normal) {
 bufNormal.str(strErr);
 bufNormal.clear();
 }
 else {
 bufTmp.str("");
 bufTmp.clear();
 bufTmp << pStr;
 if(bufTmp.str().find(bufNormal.str()) == string::npos) {
 bufNormal.str("");
 bufNormal.clear();
 }
 }
 return bufNormal.str();
 } 
200_success
145k22 gold badges190 silver badges478 bronze badges
asked Jul 13, 2016 at 3:30
\$\endgroup\$
3
  • \$\begingroup\$ You're reinventing-the-wheel on purpose, rather than using std::regex? \$\endgroup\$ Commented Jul 13, 2016 at 8:29
  • \$\begingroup\$ Why invent a non standard regular expression format. Use an existing one rather than a completely new one. Also wheel. \$\endgroup\$ Commented Jul 13, 2016 at 10:26
  • \$\begingroup\$ @LokiAstari No one wants to do these things on purpose. These are interview questions. \$\endgroup\$ Commented Sep 30, 2016 at 0:38

1 Answer 1

2
\$\begingroup\$

Speaking from a purely style point of view, nine levels of indentation is a substantial red flag, when you should be able to keep it at three.

So much indentation makes this code hard to read in the first place, which is not the sort of practice you want to get into, especially if your code will be read and evaluated by others.

answered Oct 2, 2016 at 20:40
\$\endgroup\$

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.