-
Notifications
You must be signed in to change notification settings - Fork 269
KMP classic algorithm and unit tests #125
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
|
||
/* * | ||
* The time complexity of KMP algorithm is O(n) in the worst case | ||
* Example use case: Pattern = ABCABCACA Text = AAABCBAABCABCACACABBCA | ||
* LPSArray = [ 0, 0, 0, 1, 2, 3, 4, 0, 1 ] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should be LPSArray = [ 0, 1, 0, 0, 1, 0 ] There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi @TheSTL the pattern had been updated to ABCABCACA. I feel this is a more intuitive example. I believe [ 0, 0, 0, 1, 2, 3, 4, 0, 1 ] to be the correct LPS, if there are any more queries or amendments you would like to be made, I’ll updage the PR as appropriate. Cheers. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this example is more understandable from previous one to compute LPS. |
||
* Found = true, at index 7 | ||
* */ | ||
|
||
// Longest prefix suffix - generate an array of the lps for each pattern array value | ||
const createLPS = (pattern, patternLength) => { | ||
// initialise the current longest prefix suffix length and iterator index values | ||
const lps = [patternLength]; | ||
lps[0] = 0; | ||
|
||
let length = 0; | ||
let i = 1; | ||
// while there is still pattern to iterate over - calculate the lps for i = 1 to patternLength - 1 | ||
while (i < patternLength) { | ||
/* * | ||
* if the pattern character at position i matches the pattern character at position length, | ||
* then increment length, update | ||
* the lps to the incremted length value and iterate to the next index i. | ||
* */ | ||
if (pattern.charAt(i) === pattern.charAt(length)) { | ||
length += 1; | ||
lps[i] = length; | ||
i += 1; | ||
// if not matching | ||
} else if (length !== 0) { | ||
// if the length value is not 0, then set the length to be the lps value of index length - 1 | ||
length = lps[length - 1]; | ||
} else { | ||
// else if length is 0, then set the lps at position i to length, i.e. 0 and increment i. | ||
lps[i] = length; | ||
i += 1; | ||
} | ||
} | ||
return lps; | ||
}; | ||
|
||
/* * | ||
* Invoke the Knuth-Morris-Pratt pattern matching algorithm to find a Pattern with a Text - this | ||
* uses a precomputed prefix-suffix array/table to essentially skip chunks of the text that we | ||
* know will match the pattern. This algorithm will return true if the pattern is a subset of | ||
* the text, else it will return false. | ||
* This algorithm accepts two strings, the pattern and text. | ||
* The time complexity of the KMP algorithm is O(n) in the worst case. | ||
* */ | ||
const KMPSearch = (pattern, text) => { | ||
const patternLength = pattern.length; // Often referred to as M | ||
const textLength = text.length; // Often referred to as N | ||
|
||
// Longest Pattern Suffix - array containing the lps for all pattern value positions | ||
const lps = createLPS(pattern, patternLength); // This is preprocessed. | ||
// console.log({ lpsArray: lps }) | ||
|
||
let patternIndex = 0; // Referred to as P | ||
let textIndex = 0; // Referred to as T | ||
let found = false; | ||
|
||
// While there is still text left to iterate over and the pattern has not yet been found | ||
while (textIndex < textLength && found === false) { | ||
// if the pattern char at index pos P equals the text char at text pos T, then increment indexes | ||
if (pattern.charAt(patternIndex) === text.charAt(textIndex)) { | ||
textIndex += 1; | ||
patternIndex += 1; | ||
} | ||
/* * | ||
* if the pattern index equals the pattern length then the pattern has been successfully | ||
* found, as such the pattern is a subset of the text the pattern index is set to the longest | ||
* pattern suffix value (the index is decremented due to being zero indexed). | ||
* */ | ||
if (patternIndex === patternLength) { | ||
// console.log(`Pattern found at index ${textIndex-patternIndex}`); | ||
patternIndex = lps[patternIndex - 1]; | ||
found = true; | ||
} else if (textIndex < textLength && pattern.charAt(patternIndex) !== text.charAt(textIndex)) { | ||
/* * | ||
* else if there is still text left to iterate over and the pattern character does not match | ||
* the text characterat their respective index positions, then check of the pattern Index is 0, | ||
* i.e. if it is the first pattern position. If so then jump to the next text character, else | ||
* (this is not the first pattern position), then update the pattern index using the generated | ||
* longest prefix suffix, to skip ahead of matching values. This logic will only be encountered | ||
* after T number of mismatches. | ||
* */ | ||
if (patternIndex === 0) textIndex += 1; | ||
else patternIndex = lps[patternIndex - 1]; | ||
} | ||
} | ||
// Pattern has not been found, return false. Else return true. | ||
if (!found) { | ||
// console.log('The pattern was not found!') | ||
return false; | ||
} | ||
return true; | ||
}; | ||
|
||
module.exports = { | ||
KMPSearch, | ||
}; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
const { KMPSearch } = require('.'); | ||
|
||
describe('Pattern Matching Classic Algorithm: Knuth-Morris-Pratt', () => { | ||
describe('KMPSearch', () => { | ||
it('Should return true when the pattern equals the text', () => { | ||
expect(KMPSearch('A', 'A')).toEqual(true); | ||
}); | ||
it('Should return true when the pattern is a single character and is contained within the text', () => { | ||
expect(KMPSearch('S', 'TEST')).toEqual(true); | ||
}); | ||
it('Should return true when the pattern is multiple characters and in the middle of the text', () => { | ||
expect(KMPSearch('WORLD', 'TESTWORLDTEST')).toEqual(true); | ||
}); | ||
it('Should return true when the pattern is present multiple times within the text', () => { | ||
expect(KMPSearch('ST', 'TESTWORLDTEST')).toEqual(true); | ||
}); | ||
it('Should return true when the pattern is a single character and is present at the start of the text', () => { | ||
expect(KMPSearch('A', 'ABABABAABCABCABC')).toEqual(true); | ||
}); | ||
it('Should return true when the pattern is multiple characters and is present at the start of the text', () => { | ||
expect(KMPSearch('AB', 'ABABABAABCABCABC')).toEqual(true); | ||
}); | ||
it('Should return true when the pattern contains repeating characters, and is present in the middle of the text', () => { | ||
expect(KMPSearch('AAABAAAA', 'AAAAAAAAAAABAAAAAA')).toEqual(true); | ||
}); | ||
it('Should return true when the pattern is contained within the text and the pattern contains non alphabetic characters', () => { | ||
expect(KMPSearch('AAA123! ', 'AAAAAA123! AAAAABAAAAAA')).toEqual(true); | ||
}); | ||
it('Should return false when the pattern does not equal the text', () => { | ||
expect(KMPSearch('A', 'B')).toEqual(false); | ||
}); | ||
it('Should return false when the pattern is not contained within the text', () => { | ||
expect(KMPSearch('AD', 'ABABABAABCABCABC')).toEqual(false); | ||
}); | ||
it('Should return false when the pattern is longer than the text', () => { | ||
expect(KMPSearch('AAAAAAAA', 'AAAAAA')).toEqual(false); | ||
}); | ||
}); | ||
}); |