4
\$\begingroup\$

I would like any ideas or suggestions on how to enhance the code for rolling hash algorithm and make if faster. Even if it is a change in one line. I tried to add a list, because I thought it would be more efficient, but it didn't work properly. So any ideas on how to enhance it. I would appreciate the assist.

import time
class RollingHash:
def __init__(self, text, sizeWord):
 self.text = text
 self.hash = 0
 self.sizeWord = sizeWord
 for i in range(0, sizeWord):
 # ord maps the character to a number
 # subtract out the ASCII value of "a" to start the indexing at zero
 self.hash += (ord(self.text[i]) - ord("a") + 1) * (26 ** (sizeWord - i - 1)) # change the first / with *
 # start index of current window
 self.window_start = 0
 # end of index window
 self.window_end = sizeWord
def move_window(self):
 if self.window_end <= len(self.text) - 1:
 # remove left letter from hash value
 self.hash -= (ord(self.text[self.window_start]) - ord("a") + 1) * 26 ** (self.sizeWord - 1)
 self.hash *= 26
 self.hash += ord(self.text[self.window_end]) - ord("a") + 1
 self.window_start += 1
 self.window_end += 1
def window_text(self):
 return self.text[self.window_start:self.window_end]
def rabin_karp(word, text):
if word == "" or text == "":
 return None
if len(word) > len(text):
 return None
rolling_hash = RollingHash(text, len(word))
word_hash = RollingHash(word, len(word))
# word_hash.move_window()
for i in range(len(text) - len(word) + 1):
 if rolling_hash.hash == word_hash.hash:
 if rolling_hash.window_text() == word:
 print(i)
 rolling_hash.move_window()
return 'Pattern length: ' , len(word)
text = input("Text: ")
word = input("Pattern: ")
t1 = time.clock()
results = rabin_karp(word, text)
print(results)
t2 = time.clock()
print('Run Time: %0.5f ms' % ((t2 - t1) * 1000.0))
Phrancis
20.5k6 gold badges69 silver badges155 bronze badges
asked Oct 26, 2017 at 14:59
\$\endgroup\$
4
  • 2
    \$\begingroup\$ Welcome to Code Review! What Python version are you using? \$\endgroup\$ Commented Oct 26, 2017 at 15:32
  • \$\begingroup\$ (3.x, guessing from print().) \$\endgroup\$ Commented Oct 26, 2017 at 16:56
  • \$\begingroup\$ I am using version 3.6.2. \$\endgroup\$ Commented Oct 26, 2017 at 18:15
  • \$\begingroup\$ See also: Karp-Rabin String Matching, Rabin Karp algorithm improvement for string contains A to Z only, and Text search in Python for an idea how to avoid adverse effects with longer words. \$\endgroup\$ Commented Oct 26, 2017 at 20:27

1 Answer 1

3
\$\begingroup\$

You can do as a decent compiler would: precompute some expressions, and reduce the "strength" of others.
For worse rather than better, most opportunities for this are in the constructor.
Part of the performance of on object depends on its interface - I've included an alternative function that finds the (position of the) next match, if any.
Benchmarking is difficult, micro or not: beware "printing" and use help.

import timeit
class RollingHash:
 '''A rolling hash for a window of constant length into a text,
 both specified at construction.
 '''
 adjust = ord("a") - 1
 alphabet = 26
 def __init__(self, text, size_word):
 '''Set up a rolling hash for a window of size_word into text.'''
 self.text = text
 if len(text) < size_word:
 self.hash = None
 return
 rk = 0
 for c in text[:size_word]:
 rk = rk * self.alphabet + ord(c) - self.adjust
 self.hash = rk
 self.pos = -1
 self.window_start = 0
 self.window_end = size_word
 self.multiplier = RollingHash.alphabet ** (size_word - 1)
 def move_window(self):
 '''Advance window by one position.'''
 if self.window_end < len(self.text):
 # remove left letter from hash value
 self.hash = \
 (self.hash - (ord(self.text[self.window_start])
 - RollingHash.adjust) * self.multiplier) \
 * RollingHash.alphabet \
 + ord(self.text[self.window_end]) - RollingHash.adjust
 self.window_start += 1
 self.window_end += 1
 def window_text(self):
 '''Return current window text.'''
 return self.text[self.window_start:self.window_end]
 def match(self, other):
 '''Return position of next match, or none.'''
 roll = self.hash
 text = self.text
 # "local copies" may help or hinder readability and performance
 start = self.window_start
 end = self.window_end
 limit = len(self.text)
 result = None
 while end < limit:
 if self.pos < other.hash == roll \
 and other.text == text[start:end] \
 and self.pos < start:
 result = self.pos = start
 break;
 roll = (roll - (ord(text[start])
 - RollingHash.adjust) * self.multiplier) \
 * RollingHash.alphabet \
 + ord(text[end]) - RollingHash.adjust
 start += 1
 end += 1
 self.window_start = start
 self.window_end = end
 return result
verbose = True
def rabin_karp(word, text):
 '''Print indexes of matches for word in text.'''
 if word == "" or text == "":
 return None
 size_word = len(word)
 if size_word > len(text):
 return None
 rolling_hash = RollingHash(text, size_word)
 word_hash = RollingHash(word, size_word)
 for i in range(len(text) - size_word + 1):
 if rolling_hash.hash == word_hash.hash:
 if rolling_hash.window_text() == word:
 if verbose:
 print(i)
 else:
 print(rolling_hash.window_text(), '<>', word, "at", i)
 rolling_hash.move_window()
 return 'Pattern length: ', size_word
def karp_rabin(word, text):
 '''Print indexes of matches for word in text.'''
 size_word = len(word)
 if not 0 < size_word <= len(text):
 return None
 rolling_hash = RollingHash(text, size_word)
 word_hash = RollingHash(word, size_word)
 while True:
 position = rolling_hash.match(word_hash)
 if position is None:
 return 'Pattern length: ', size_word
 if verbose:
 print(position)
if __name__ == '__main__':
 text = input("Text: ")
 word = input("Pattern: ")
 print(rabin_karp(word, text))
 print(karp_rabin(word, text))
 verbose = False
 glob = globals()
 # have a look at timeit.Timer.repeat() and autorange(), too
 print(timeit.timeit('results = rabin_karp(word, text)',
 globals=glob, number=9999))
 print(timeit.timeit('results = karp_rabin(word, text)',
 globals=glob, number=9999))
answered Oct 26, 2017 at 15:40
\$\endgroup\$
2
  • \$\begingroup\$ Thank you so much! , i compared the two codes run time, and it's faster \$\endgroup\$ Commented Oct 26, 2017 at 15:52
  • \$\begingroup\$ Things. take. time. \$\endgroup\$ Commented Oct 26, 2017 at 22:43

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.