I would like any ideas or suggestions on how to enhance the code for rolling hash algorithm and make if faster. Even if it is a change in one line. I tried to add a list, because I thought it would be more efficient, but it didn't work properly. So any ideas on how to enhance it. I would appreciate the assist.
import time
class RollingHash:
def __init__(self, text, sizeWord):
self.text = text
self.hash = 0
self.sizeWord = sizeWord
for i in range(0, sizeWord):
# ord maps the character to a number
# subtract out the ASCII value of "a" to start the indexing at zero
self.hash += (ord(self.text[i]) - ord("a") + 1) * (26 ** (sizeWord - i - 1)) # change the first / with *
# start index of current window
self.window_start = 0
# end of index window
self.window_end = sizeWord
def move_window(self):
if self.window_end <= len(self.text) - 1:
# remove left letter from hash value
self.hash -= (ord(self.text[self.window_start]) - ord("a") + 1) * 26 ** (self.sizeWord - 1)
self.hash *= 26
self.hash += ord(self.text[self.window_end]) - ord("a") + 1
self.window_start += 1
self.window_end += 1
def window_text(self):
return self.text[self.window_start:self.window_end]
def rabin_karp(word, text):
if word == "" or text == "":
return None
if len(word) > len(text):
return None
rolling_hash = RollingHash(text, len(word))
word_hash = RollingHash(word, len(word))
# word_hash.move_window()
for i in range(len(text) - len(word) + 1):
if rolling_hash.hash == word_hash.hash:
if rolling_hash.window_text() == word:
print(i)
rolling_hash.move_window()
return 'Pattern length: ' , len(word)
text = input("Text: ")
word = input("Pattern: ")
t1 = time.clock()
results = rabin_karp(word, text)
print(results)
t2 = time.clock()
print('Run Time: %0.5f ms' % ((t2 - t1) * 1000.0))
1 Answer 1
You can do as a decent compiler would: precompute some expressions, and reduce the "strength" of others.
For worse rather than better, most opportunities for this are in the constructor.
Part of the performance of on object depends on its interface - I've included an alternative function that finds the (position of the) next match, if any.
Benchmarking is difficult, micro or not: beware "printing" and use help.
import timeit
class RollingHash:
'''A rolling hash for a window of constant length into a text,
both specified at construction.
'''
adjust = ord("a") - 1
alphabet = 26
def __init__(self, text, size_word):
'''Set up a rolling hash for a window of size_word into text.'''
self.text = text
if len(text) < size_word:
self.hash = None
return
rk = 0
for c in text[:size_word]:
rk = rk * self.alphabet + ord(c) - self.adjust
self.hash = rk
self.pos = -1
self.window_start = 0
self.window_end = size_word
self.multiplier = RollingHash.alphabet ** (size_word - 1)
def move_window(self):
'''Advance window by one position.'''
if self.window_end < len(self.text):
# remove left letter from hash value
self.hash = \
(self.hash - (ord(self.text[self.window_start])
- RollingHash.adjust) * self.multiplier) \
* RollingHash.alphabet \
+ ord(self.text[self.window_end]) - RollingHash.adjust
self.window_start += 1
self.window_end += 1
def window_text(self):
'''Return current window text.'''
return self.text[self.window_start:self.window_end]
def match(self, other):
'''Return position of next match, or none.'''
roll = self.hash
text = self.text
# "local copies" may help or hinder readability and performance
start = self.window_start
end = self.window_end
limit = len(self.text)
result = None
while end < limit:
if self.pos < other.hash == roll \
and other.text == text[start:end] \
and self.pos < start:
result = self.pos = start
break;
roll = (roll - (ord(text[start])
- RollingHash.adjust) * self.multiplier) \
* RollingHash.alphabet \
+ ord(text[end]) - RollingHash.adjust
start += 1
end += 1
self.window_start = start
self.window_end = end
return result
verbose = True
def rabin_karp(word, text):
'''Print indexes of matches for word in text.'''
if word == "" or text == "":
return None
size_word = len(word)
if size_word > len(text):
return None
rolling_hash = RollingHash(text, size_word)
word_hash = RollingHash(word, size_word)
for i in range(len(text) - size_word + 1):
if rolling_hash.hash == word_hash.hash:
if rolling_hash.window_text() == word:
if verbose:
print(i)
else:
print(rolling_hash.window_text(), '<>', word, "at", i)
rolling_hash.move_window()
return 'Pattern length: ', size_word
def karp_rabin(word, text):
'''Print indexes of matches for word in text.'''
size_word = len(word)
if not 0 < size_word <= len(text):
return None
rolling_hash = RollingHash(text, size_word)
word_hash = RollingHash(word, size_word)
while True:
position = rolling_hash.match(word_hash)
if position is None:
return 'Pattern length: ', size_word
if verbose:
print(position)
if __name__ == '__main__':
text = input("Text: ")
word = input("Pattern: ")
print(rabin_karp(word, text))
print(karp_rabin(word, text))
verbose = False
glob = globals()
# have a look at timeit.Timer.repeat() and autorange(), too
print(timeit.timeit('results = rabin_karp(word, text)',
globals=glob, number=9999))
print(timeit.timeit('results = karp_rabin(word, text)',
globals=glob, number=9999))
-
\$\begingroup\$ Thank you so much! , i compared the two codes run time, and it's faster \$\endgroup\$user151942– user1519422017年10月26日 15:52:59 +00:00Commented Oct 26, 2017 at 15:52
-
\$\begingroup\$ Things. take. time. \$\endgroup\$greybeard– greybeard2017年10月26日 22:43:18 +00:00Commented Oct 26, 2017 at 22:43
print()
.) \$\endgroup\$