|
| 1 | +def kmp_search(pat, txt): |
| 2 | + M = len(pat) |
| 3 | + N = len(txt) |
| 4 | + |
| 5 | + # create lps[] that will hold the longest prefix suffix |
| 6 | + # values for pattern |
| 7 | + lps = [0] * M |
| 8 | + j = 0 # index for pat[] |
| 9 | + |
| 10 | + # Preprocess the pattern (calculate lps[] array) |
| 11 | + computeLPSArray(pat, M, lps) |
| 12 | + |
| 13 | + i = 0 # index for txt[] |
| 14 | + indices = [] |
| 15 | + while i < N: |
| 16 | + if pat[j] == txt[i]: |
| 17 | + i += 1 |
| 18 | + j += 1 |
| 19 | + |
| 20 | + if j == M: |
| 21 | + indices.append(i - j) |
| 22 | + j = lps[j - 1] |
| 23 | + |
| 24 | + # mismatch after j matches |
| 25 | + elif i < N and pat[j] != txt[i]: |
| 26 | + # Do not match lps[0..lps[j-1]] characters, |
| 27 | + # they will match anyway |
| 28 | + if j != 0: |
| 29 | + j = lps[j - 1] |
| 30 | + else: |
| 31 | + i += 1 |
| 32 | + |
| 33 | + return indices |
| 34 | + |
| 35 | +def computeLPSArray(pat, M, lps): |
| 36 | + len = 0 # length of the previous longest prefix suffix |
| 37 | + |
| 38 | + lps[0] = 0 |
| 39 | + i = 1 |
| 40 | + |
| 41 | + # the loop calculates lps[i] for i = 1 to M-1 |
| 42 | + while i < M: |
| 43 | + if pat[i] == pat[len]: |
| 44 | + len += 1 |
| 45 | + lps[i] = len |
| 46 | + i += 1 |
| 47 | + else: |
| 48 | + # This is tricky. Consider the example. |
| 49 | + # AAACAAAA and i = 7. The idea is similar |
| 50 | + # to search step. |
| 51 | + if len != 0: |
| 52 | + len = lps[len - 1] |
| 53 | + |
| 54 | + # Also, note that we do not increment i here |
| 55 | + else: |
| 56 | + lps[i] = 0 |
| 57 | + i += 1 |
| 58 | + |
| 59 | + |
0 commit comments