From f430c31aac0adbed7d2090ae9f1466820ea649df Mon Sep 17 00:00:00 2001 From: Jonathan McChesney Date: 2019年10月21日 20:57:12 +0100 Subject: [PATCH 1/4] Add KMP and relevant unit tests --- src/_Classics_/knuth-morris-pratt/index.js | 91 +++++++++++++++++++ .../knuth-morris-pratt.test.js | 39 ++++++++ 2 files changed, 130 insertions(+) create mode 100644 src/_Classics_/knuth-morris-pratt/index.js create mode 100644 src/_Classics_/knuth-morris-pratt/knuth-morris-pratt.test.js diff --git a/src/_Classics_/knuth-morris-pratt/index.js b/src/_Classics_/knuth-morris-pratt/index.js new file mode 100644 index 00000000..c3f5b643 --- /dev/null +++ b/src/_Classics_/knuth-morris-pratt/index.js @@ -0,0 +1,91 @@ + + // Longest prefix suffix - generate an array of the longest previous suffix for each pattern array value + const createLPS = (pattern, patternLength, lps) => { + // initialise the current longest prefix suffix length and iterator index values + lps[0] = 0; + let length = 0; + let i = 1; + // while there is still pattern to iterate over - calculate the lps for i = 1 to patternLength - 1 + while (i < patternLength) { + /* * + * if the pattern character at position i matches the pattern character at position length, then increment length, update + * the lps to the incremted length value and iterate to the next index i. + * */ + if (pattern.charAt(i) === pattern.charAt(length)) { + length++; + lps[i] = length; + i++; + } + // if a match is not found + else { + // if the length value is not 0, then set the length to be the lps value of index length - 1 + if (length !== 0) { + length = lps[length - 1]; + } + // else if length is 0, then set the lps at position i to length, i.e. 0 and increment i. + else { + lps[i] = length; + i++; + } + } + } + return lps; + } + + /* * + * Invoke the Knuth-Morris-Pratt pattern matching algorithm to find a Pattern with a Text - this uses a precomputed prefix-suffix + * array/table to essentially skip chunks of the text that we know will match the pattern. + * This algorithm will return true if the pattern is a subset of the text, else it will return false. + * This algorithm accepts two strings, the pattern and text. + * */ + const KMPSearch = (pattern, text) => { + const patternLength = pattern.length; // Often referred to as M + const textLength = text.length; // Often referred to as N + + let lps = [patternLength]; // Longest Pattern Suffix - array containing the lps for all pattern value positions + lps = createLPS(pattern, patternLength, lps); // This is preprocessed - before the text is searched for the pattern. + + let patternIndex = 0; // Referred to as P + let textIndex = 0; // Referred to as T + let found = false; + + // While there is still text left to iterate over and the pattern has not yet been found + while (textIndex < textLength && found === false) { + // if the pattern character at pattern index P equals the text character at text index T, then increment the text and pattern indexes + if (pattern.charAt(patternIndex) === text.charAt(textIndex)) { + textIndex++; + patternIndex++; + } + /* * + * if the pattern index equals the pattern length then the pattern has been successfully found, as such the pattern is a subset of + * the text the pattern index is set to the longest pattern suffix value (the index is decremented due to being zero indexed). + * */ + if (patternIndex === patternLength) { + // console.log(`Pattern found at index ${textIndex-patternIndex}`); + patternIndex = lps[patternIndex - 1]; + found = true; + } + /* * + * else if there is still text left to iterate over and the pattern character does not match the text character at their respective + * index positions, then check of the pattern Index is 0, i.e. if it is the first pattern position. If so then jump to the next text + * character, else (this is not the first pattern position), then update the pattern index using the generated longest pattern suffix, + * to skip ahead of matching values. This logic will only be encountered after T number of mismatches. + * */ + else if (textIndex < textLength && pattern.charAt(patternIndex) !== text.charAt(textIndex)) { + if (patternIndex === 0) + textIndex = textIndex + 1; + else + patternIndex = lps[patternIndex - 1]; + } + } + // Pattern has not been found, return false. Else return true. + if (!found) { + // console.log('The pattern was not found!') + return false + } + return true + }; + + module.exports = { + KMPSearch + }; diff --git a/src/_Classics_/knuth-morris-pratt/knuth-morris-pratt.test.js b/src/_Classics_/knuth-morris-pratt/knuth-morris-pratt.test.js new file mode 100644 index 00000000..770bda85 --- /dev/null +++ b/src/_Classics_/knuth-morris-pratt/knuth-morris-pratt.test.js @@ -0,0 +1,39 @@ +const { KMPSearch } = require('.'); + +describe('Pattern Matching Classic Algorithm: Knuth-Morris-Pratt', () => { + describe('KMPSearch', () =>{ + it('Should return true when the pattern equals the text', () => { + expect(KMPSearch('A', 'A')).toEqual(true); + }); + it('Should return true when the pattern is a single character and is contained within the text', () => { + expect(KMPSearch('S', 'TEST')).toEqual(true); + }); + it('Should return true when the pattern is multiple characters and in the middle of the text', () => { + expect(KMPSearch('WORLD', 'TESTWORLDTEST')).toEqual(true); + }); + it('Should return true when the pattern is present multiple times within the text', () => { + expect(KMPSearch('ST', 'TESTWORLDTEST')).toEqual(true); + }); + it('Should return true when the pattern is a single character and is present at the start of the text', () => { + expect(KMPSearch('A', 'ABABABAABCABCABC')).toEqual(true); + }); + it('Should return true when the pattern is multiple characters and is present at the start of the text', () => { + expect(KMPSearch('AB', 'ABABABAABCABCABC')).toEqual(true); + }); + it('Should return true when the pattern contains repeating characters, and is present in the middle of the text', () => { + expect(KMPSearch('AAABAAAA', 'AAAAAAAAAAABAAAAAA')).toEqual(true); + }); + it('Should return true when the pattern is contained within the text and the pattern contains non alphabetic characters', () => { + expect(KMPSearch('AAA123! ', 'AAAAAA123! AAAAABAAAAAA')).toEqual(true); + }); + it('Should return false when the pattern does not equal the text', () => { + expect(KMPSearch('A', 'B')).toEqual(false); + }); + it('Should return false when the pattern is not contained within the text', () => { + expect(KMPSearch('AD', 'ABABABAABCABCABC')).toEqual(false); + }); + it('Should return false when the pattern is longer than the text', () => { + expect(KMPSearch('AAAAAAAA', 'AAAAAA')).toEqual(false); + }); + }); +}); \ No newline at end of file From f90ad4bae62cf044cf0cabe2ed1aa377231e83ba Mon Sep 17 00:00:00 2001 From: Jonathan McChesney Date: 2019年10月21日 21:09:21 +0100 Subject: [PATCH 2/4] add time complexity and preamble --- src/_Classics_/knuth-morris-pratt/index.js | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/_Classics_/knuth-morris-pratt/index.js b/src/_Classics_/knuth-morris-pratt/index.js index c3f5b643..388b2f1b 100644 --- a/src/_Classics_/knuth-morris-pratt/index.js +++ b/src/_Classics_/knuth-morris-pratt/index.js @@ -1,4 +1,11 @@ + /* * + * The time complexity of KMP algorithm is O(n) in the worst case + * Example use case: Pattern = AABCAB Text = AAABACABAABCABAABCA + * LPSArray = [ 0, 0, 1, 2, 3, 0 ] + * Found = true, at position 13 + * */ + // Longest prefix suffix - generate an array of the longest previous suffix for each pattern array value const createLPS = (pattern, patternLength, lps) => { // initialise the current longest prefix suffix length and iterator index values @@ -37,6 +44,7 @@ * array/table to essentially skip chunks of the text that we know will match the pattern. * This algorithm will return true if the pattern is a subset of the text, else it will return false. * This algorithm accepts two strings, the pattern and text. + * The time complexity of the KMP algorithm is O(n) in the worst case. * */ const KMPSearch = (pattern, text) => { const patternLength = pattern.length; // Often referred to as M @@ -44,6 +52,7 @@ let lps = [patternLength]; // Longest Pattern Suffix - array containing the lps for all pattern value positions lps = createLPS(pattern, patternLength, lps); // This is preprocessed - before the text is searched for the pattern. + // console.log({ lpsArray: lps }) let patternIndex = 0; // Referred to as P let textIndex = 0; // Referred to as T From ae9dfd2ffb98b6a7e0c4bd0738cb0944dbf1d00d Mon Sep 17 00:00:00 2001 From: Jonathan McChesney Date: 2019年10月21日 21:34:43 +0100 Subject: [PATCH 3/4] update eslint rules for kmp --- src/_Classics_/knuth-morris-pratt/index.js | 184 +++++++++--------- .../knuth-morris-pratt.test.js | 4 +- 2 files changed, 94 insertions(+), 94 deletions(-) diff --git a/src/_Classics_/knuth-morris-pratt/index.js b/src/_Classics_/knuth-morris-pratt/index.js index 388b2f1b..5a9c16c1 100644 --- a/src/_Classics_/knuth-morris-pratt/index.js +++ b/src/_Classics_/knuth-morris-pratt/index.js @@ -1,100 +1,100 @@ +/* * +* The time complexity of KMP algorithm is O(n) in the worst case +* Example use case: Pattern = AABCAB Text = AAABACABAABCABAABCA +* LPSArray = [ 0, 0, 1, 2, 3, 0 ] +* Found = true, at position 13 +* */ + +// Longest prefix suffix - generate an array of the lps for each pattern array value +const createLPS = (pattern, patternLength) => { + // initialise the current longest prefix suffix length and iterator index values + const lps = [patternLength]; + lps[0] = 0; + + let length = 0; + let i = 1; + // while there is still pattern to iterate over - calculate the lps for i = 1 to patternLength - 1 + while (i < patternLength) { /* * - * The time complexity of KMP algorithm is O(n) in the worst case - * Example use case: Pattern = AABCAB Text = AAABACABAABCABAABCA - * LPSArray = [ 0, 0, 1, 2, 3, 0 ] - * Found = true, at position 13 - * */ - - // Longest prefix suffix - generate an array of the longest previous suffix for each pattern array value - const createLPS = (pattern, patternLength, lps) => { - // initialise the current longest prefix suffix length and iterator index values - lps[0] = 0; - let length = 0; - let i = 1; - // while there is still pattern to iterate over - calculate the lps for i = 1 to patternLength - 1 - while (i < patternLength) { - /* * - * if the pattern character at position i matches the pattern character at position length, then increment length, update - * the lps to the incremted length value and iterate to the next index i. - * */ - if (pattern.charAt(i) === pattern.charAt(length)) { - length++; - lps[i] = length; - i++; - } - // if a match is not found - else { - // if the length value is not 0, then set the length to be the lps value of index length - 1 - if (length !== 0) { - length = lps[length - 1]; - } - // else if length is 0, then set the lps at position i to length, i.e. 0 and increment i. - else { - lps[i] = length; - i++; - } - } - } - return lps; + * if the pattern character at position i matches the pattern character at position length, + * then increment length, update + * the lps to the incremted length value and iterate to the next index i. + * */ + if (pattern.charAt(i) === pattern.charAt(length)) { + length += 1; + lps[i] = length; + i += 1; + // if not matching + } else if (length !== 0) { + // if the length value is not 0, then set the length to be the lps value of index length - 1 + length = lps[length - 1]; + } else { + // else if length is 0, then set the lps at position i to length, i.e. 0 and increment i. + lps[i] = length; + i += 1; } + } + return lps; +}; - /* * - * Invoke the Knuth-Morris-Pratt pattern matching algorithm to find a Pattern with a Text - this uses a precomputed prefix-suffix - * array/table to essentially skip chunks of the text that we know will match the pattern. - * This algorithm will return true if the pattern is a subset of the text, else it will return false. - * This algorithm accepts two strings, the pattern and text. - * The time complexity of the KMP algorithm is O(n) in the worst case. - * */ - const KMPSearch = (pattern, text) => { - const patternLength = pattern.length; // Often referred to as M - const textLength = text.length; // Often referred to as N +/* * +* Invoke the Knuth-Morris-Pratt pattern matching algorithm to find a Pattern with a Text - this +* uses a precomputed prefix-suffix array/table to essentially skip chunks of the text that we +* know will match the pattern. This algorithm will return true if the pattern is a subset of +* the text, else it will return false. +* This algorithm accepts two strings, the pattern and text. +* The time complexity of the KMP algorithm is O(n) in the worst case. +* */ +const KMPSearch = (pattern, text) => { + const patternLength = pattern.length; // Often referred to as M + const textLength = text.length; // Often referred to as N - let lps = [patternLength]; // Longest Pattern Suffix - array containing the lps for all pattern value positions - lps = createLPS(pattern, patternLength, lps); // This is preprocessed - before the text is searched for the pattern. - // console.log({ lpsArray: lps }) + // Longest Pattern Suffix - array containing the lps for all pattern value positions + const lps = createLPS(pattern, patternLength); // This is preprocessed. + // console.log({ lpsArray: lps }) - let patternIndex = 0; // Referred to as P - let textIndex = 0; // Referred to as T - let found = false; + let patternIndex = 0; // Referred to as P + let textIndex = 0; // Referred to as T + let found = false; - // While there is still text left to iterate over and the pattern has not yet been found - while (textIndex < textLength && found === false) { - // if the pattern character at pattern index P equals the text character at text index T, then increment the text and pattern indexes - if (pattern.charAt(patternIndex) === text.charAt(textIndex)) { - textIndex++; - patternIndex++; - } - /* * - * if the pattern index equals the pattern length then the pattern has been successfully found, as such the pattern is a subset of - * the text the pattern index is set to the longest pattern suffix value (the index is decremented due to being zero indexed). - * */ - if (patternIndex === patternLength) { - // console.log(`Pattern found at index ${textIndex-patternIndex}`); - patternIndex = lps[patternIndex - 1]; - found = true; - } - /* * - * else if there is still text left to iterate over and the pattern character does not match the text character at their respective - * index positions, then check of the pattern Index is 0, i.e. if it is the first pattern position. If so then jump to the next text - * character, else (this is not the first pattern position), then update the pattern index using the generated longest pattern suffix, - * to skip ahead of matching values. This logic will only be encountered after T number of mismatches. - * */ - else if (textIndex < textLength && pattern.charAt(patternIndex) !== text.charAt(textIndex)) { - if (patternIndex === 0) - textIndex = textIndex + 1; - else - patternIndex = lps[patternIndex - 1]; - } - } - // Pattern has not been found, return false. Else return true. - if (!found) { - // console.log('The pattern was not found!') - return false - } - return true - }; + // While there is still text left to iterate over and the pattern has not yet been found + while (textIndex < textLength && found === false) { + // if the pattern char at index pos P equals the text char at text pos T, then increment indexes + if (pattern.charAt(patternIndex) === text.charAt(textIndex)) { + textIndex += 1; + patternIndex += 1; + } + /* * + * if the pattern index equals the pattern length then the pattern has been successfully + * found, as such the pattern is a subset of the text the pattern index is set to the longest + * pattern suffix value (the index is decremented due to being zero indexed). + * */ + if (patternIndex === patternLength) { + // console.log(`Pattern found at index ${textIndex-patternIndex}`); + patternIndex = lps[patternIndex - 1]; + found = true; + } else if (textIndex < textLength && pattern.charAt(patternIndex) !== text.charAt(textIndex)) { + /* * + * else if there is still text left to iterate over and the pattern character does not match + * the text characterat their respective index positions, then check of the pattern Index is 0, + * i.e. if it is the first pattern position. If so then jump to the next text character, else + * (this is not the first pattern position), then update the pattern index using the generated + * longest prefix suffix, to skip ahead of matching values. This logic will only be encountered + * after T number of mismatches. + * */ + if (patternIndex === 0) textIndex += 1; + else patternIndex = lps[patternIndex - 1]; + } + } + // Pattern has not been found, return false. Else return true. + if (!found) { + // console.log('The pattern was not found!') + return false; + } + return true; +}; - module.exports = { - KMPSearch - }; +module.exports = { + KMPSearch, +}; diff --git a/src/_Classics_/knuth-morris-pratt/knuth-morris-pratt.test.js b/src/_Classics_/knuth-morris-pratt/knuth-morris-pratt.test.js index 770bda85..4837f6c6 100644 --- a/src/_Classics_/knuth-morris-pratt/knuth-morris-pratt.test.js +++ b/src/_Classics_/knuth-morris-pratt/knuth-morris-pratt.test.js @@ -1,7 +1,7 @@ const { KMPSearch } = require('.'); describe('Pattern Matching Classic Algorithm: Knuth-Morris-Pratt', () => { - describe('KMPSearch', () =>{ + describe('KMPSearch', () => { it('Should return true when the pattern equals the text', () => { expect(KMPSearch('A', 'A')).toEqual(true); }); @@ -36,4 +36,4 @@ describe('Pattern Matching Classic Algorithm: Knuth-Morris-Pratt', () => { expect(KMPSearch('AAAAAAAA', 'AAAAAA')).toEqual(false); }); }); -}); \ No newline at end of file +}); From c1b47dc280c82e772fa55d39663d3e4782188121 Mon Sep 17 00:00:00 2001 From: Jonathan McChesney Date: 2019年10月23日 21:24:21 +0100 Subject: [PATCH 4/4] Update example in preamble --- src/_Classics_/knuth-morris-pratt/index.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/_Classics_/knuth-morris-pratt/index.js b/src/_Classics_/knuth-morris-pratt/index.js index 5a9c16c1..744fe1e4 100644 --- a/src/_Classics_/knuth-morris-pratt/index.js +++ b/src/_Classics_/knuth-morris-pratt/index.js @@ -1,9 +1,9 @@ /* * * The time complexity of KMP algorithm is O(n) in the worst case -* Example use case: Pattern = AABCAB Text = AAABACABAABCABAABCA -* LPSArray = [ 0, 0, 1, 2, 3, 0 ] -* Found = true, at position 13 +* Example use case: Pattern = ABCABCACA Text = AAABCBAABCABCACACABBCA +* LPSArray = [ 0, 0, 0, 1, 2, 3, 4, 0, 1 ] +* Found = true, at index 7 * */ // Longest prefix suffix - generate an array of the lps for each pattern array value

AltStyle によって変換されたページ (->オリジナル) /