22# -*- coding: utf-8 -*-
33
44
5- import stream as sm
6- 7- 8- def damerau_levenshtein_distance (
9- s1 ,
10- s2 ,
11- deletion_cost = 1 ,
12- insertion_cost = 1 ,
13- substitution_cost = 1 ,
14- transposition_cost = 1
15- ):
16- d = {}
17- lenstr1 = len (s1 )
18- lenstr2 = len (s2 )
19- for i in range (- 1 ,lenstr1 + 1 ):
20- d [(i ,- 1 )] = (i + 1 ) * deletion_cost
21- for j in range (- 1 ,lenstr2 + 1 ):
22- d [(- 1 ,j )] = min (j + 1 , 1 ) * insertion_cost
23- 24- for i in range (lenstr1 ):
25- best_before_insertion_cost = d [(i ,- 1 )]
26- for j in range (lenstr2 ):
27- best_before_insertion_cost = min (best_before_insertion_cost , d [(i ,j - 1 )])
28- if s1 [i ] == s2 [j ]:
29- cost = 0
30- else :
31- cost = 1
32- d [(i ,j )] = min (
33- d [(i - 1 ,j )] + deletion_cost , # deletion
34- best_before_insertion_cost + insertion_cost , # insertion
35- d [(i - 1 ,j - 1 )] + cost * substitution_cost , # substitution
36- )
37- if i and j and s1 [i ]== s2 [j - 1 ] and s1 [i - 1 ] == s2 [j ]:
38- d [(i ,j )] = min (d [(i ,j )], d [i - 2 ,j - 2 ] + cost * transposition_cost ) # transposition
39- 40- return d [lenstr1 - 1 ,lenstr2 - 1 ]
41- 42- 435def levenshtein (
446 s1 ,
457 s2 ,
@@ -55,7 +17,7 @@ def levenshtein(
5517 if not a :
5618 a .append ([i * insertion_cost for i in range (len (s2 ) + 1 )])
5719
58- for i , c1 in enumerate (s1 )>> sm . drop ( len (a ) - 1 ) :
20+ for i , c1 in list ( enumerate (s1 ))[ len (a ) - 1 :] :
5921 previous_row = i
6022 current_row = i + 1
6123 a .append ([0 for i in range (len (s2 ) + 1 )])
0 commit comments