Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 7b8fc82

Browse files
authored
Merge pull request knaxus#125 from jonathanmcchesney/master
KMP classic algorithm and unit tests
2 parents 1aaa484 + c1b47dc commit 7b8fc82

File tree

2 files changed

+139
-0
lines changed

2 files changed

+139
-0
lines changed
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
2+
/* *
3+
* The time complexity of KMP algorithm is O(n) in the worst case
4+
* Example use case: Pattern = ABCABCACA Text = AAABCBAABCABCACACABBCA
5+
* LPSArray = [ 0, 0, 0, 1, 2, 3, 4, 0, 1 ]
6+
* Found = true, at index 7
7+
* */
8+
9+
// Longest prefix suffix - generate an array of the lps for each pattern array value
10+
const createLPS = (pattern, patternLength) => {
11+
// initialise the current longest prefix suffix length and iterator index values
12+
const lps = [patternLength];
13+
lps[0] = 0;
14+
15+
let length = 0;
16+
let i = 1;
17+
// while there is still pattern to iterate over - calculate the lps for i = 1 to patternLength - 1
18+
while (i < patternLength) {
19+
/* *
20+
* if the pattern character at position i matches the pattern character at position length,
21+
* then increment length, update
22+
* the lps to the incremted length value and iterate to the next index i.
23+
* */
24+
if (pattern.charAt(i) === pattern.charAt(length)) {
25+
length += 1;
26+
lps[i] = length;
27+
i += 1;
28+
// if not matching
29+
} else if (length !== 0) {
30+
// if the length value is not 0, then set the length to be the lps value of index length - 1
31+
length = lps[length - 1];
32+
} else {
33+
// else if length is 0, then set the lps at position i to length, i.e. 0 and increment i.
34+
lps[i] = length;
35+
i += 1;
36+
}
37+
}
38+
return lps;
39+
};
40+
41+
/* *
42+
* Invoke the Knuth-Morris-Pratt pattern matching algorithm to find a Pattern with a Text - this
43+
* uses a precomputed prefix-suffix array/table to essentially skip chunks of the text that we
44+
* know will match the pattern. This algorithm will return true if the pattern is a subset of
45+
* the text, else it will return false.
46+
* This algorithm accepts two strings, the pattern and text.
47+
* The time complexity of the KMP algorithm is O(n) in the worst case.
48+
* */
49+
const KMPSearch = (pattern, text) => {
50+
const patternLength = pattern.length; // Often referred to as M
51+
const textLength = text.length; // Often referred to as N
52+
53+
// Longest Pattern Suffix - array containing the lps for all pattern value positions
54+
const lps = createLPS(pattern, patternLength); // This is preprocessed.
55+
// console.log({ lpsArray: lps })
56+
57+
let patternIndex = 0; // Referred to as P
58+
let textIndex = 0; // Referred to as T
59+
let found = false;
60+
61+
// While there is still text left to iterate over and the pattern has not yet been found
62+
while (textIndex < textLength && found === false) {
63+
// if the pattern char at index pos P equals the text char at text pos T, then increment indexes
64+
if (pattern.charAt(patternIndex) === text.charAt(textIndex)) {
65+
textIndex += 1;
66+
patternIndex += 1;
67+
}
68+
/* *
69+
* if the pattern index equals the pattern length then the pattern has been successfully
70+
* found, as such the pattern is a subset of the text the pattern index is set to the longest
71+
* pattern suffix value (the index is decremented due to being zero indexed).
72+
* */
73+
if (patternIndex === patternLength) {
74+
// console.log(`Pattern found at index ${textIndex-patternIndex}`);
75+
patternIndex = lps[patternIndex - 1];
76+
found = true;
77+
} else if (textIndex < textLength && pattern.charAt(patternIndex) !== text.charAt(textIndex)) {
78+
/* *
79+
* else if there is still text left to iterate over and the pattern character does not match
80+
* the text characterat their respective index positions, then check of the pattern Index is 0,
81+
* i.e. if it is the first pattern position. If so then jump to the next text character, else
82+
* (this is not the first pattern position), then update the pattern index using the generated
83+
* longest prefix suffix, to skip ahead of matching values. This logic will only be encountered
84+
* after T number of mismatches.
85+
* */
86+
if (patternIndex === 0) textIndex += 1;
87+
else patternIndex = lps[patternIndex - 1];
88+
}
89+
}
90+
// Pattern has not been found, return false. Else return true.
91+
if (!found) {
92+
// console.log('The pattern was not found!')
93+
return false;
94+
}
95+
return true;
96+
};
97+
98+
module.exports = {
99+
KMPSearch,
100+
};
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
const { KMPSearch } = require('.');
2+
3+
describe('Pattern Matching Classic Algorithm: Knuth-Morris-Pratt', () => {
4+
describe('KMPSearch', () => {
5+
it('Should return true when the pattern equals the text', () => {
6+
expect(KMPSearch('A', 'A')).toEqual(true);
7+
});
8+
it('Should return true when the pattern is a single character and is contained within the text', () => {
9+
expect(KMPSearch('S', 'TEST')).toEqual(true);
10+
});
11+
it('Should return true when the pattern is multiple characters and in the middle of the text', () => {
12+
expect(KMPSearch('WORLD', 'TESTWORLDTEST')).toEqual(true);
13+
});
14+
it('Should return true when the pattern is present multiple times within the text', () => {
15+
expect(KMPSearch('ST', 'TESTWORLDTEST')).toEqual(true);
16+
});
17+
it('Should return true when the pattern is a single character and is present at the start of the text', () => {
18+
expect(KMPSearch('A', 'ABABABAABCABCABC')).toEqual(true);
19+
});
20+
it('Should return true when the pattern is multiple characters and is present at the start of the text', () => {
21+
expect(KMPSearch('AB', 'ABABABAABCABCABC')).toEqual(true);
22+
});
23+
it('Should return true when the pattern contains repeating characters, and is present in the middle of the text', () => {
24+
expect(KMPSearch('AAABAAAA', 'AAAAAAAAAAABAAAAAA')).toEqual(true);
25+
});
26+
it('Should return true when the pattern is contained within the text and the pattern contains non alphabetic characters', () => {
27+
expect(KMPSearch('AAA123! ', 'AAAAAA123! AAAAABAAAAAA')).toEqual(true);
28+
});
29+
it('Should return false when the pattern does not equal the text', () => {
30+
expect(KMPSearch('A', 'B')).toEqual(false);
31+
});
32+
it('Should return false when the pattern is not contained within the text', () => {
33+
expect(KMPSearch('AD', 'ABABABAABCABCABC')).toEqual(false);
34+
});
35+
it('Should return false when the pattern is longer than the text', () => {
36+
expect(KMPSearch('AAAAAAAA', 'AAAAAA')).toEqual(false);
37+
});
38+
});
39+
});

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /