1
+ /*
2
+ * The MIT License
3
+ *
4
+ * Copyright 2015 Thibault Debatty.
5
+ *
6
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ * of this software and associated documentation files (the "Software"), to deal
8
+ * in the Software without restriction, including without limitation the rights
9
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ * copies of the Software, and to permit persons to whom the Software is
11
+ * furnished to do so, subject to the following conditions:
12
+ *
13
+ * The above copyright notice and this permission notice shall be included in
14
+ * all copies or substantial portions of the Software.
15
+ *
16
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ * THE SOFTWARE.
23
+ */
24
+ package info .debatty .java .stringsimilarity ;
25
+
26
+ import info .debatty .java .stringsimilarity .interfaces .NormalizedStringSimilarity ;
27
+ import info .debatty .java .stringsimilarity .interfaces .NormalizedStringDistance ;
28
+ import java .util .*;
29
+
30
+ import net .jcip .annotations .Immutable ;
31
+
32
+ /**
33
+ * Ratcliff/Obershelp pattern recognition
34
+ * The Ratcliff/Obershelp algorithm computes the similarity of two strings a
35
+ * the doubled number of matching characters divided by the total number of
36
+ * characters in the two strings. Matching characters are those in the longest
37
+ * common subsequence plus, recursively, matching characters in the unmatched
38
+ * region on either side of the longest common subsequence.
39
+ * The Ratcliff/Obershelp distance is computed as 1 - Ratcliff/Obershelp similarity.
40
+ *
41
+ * @author Ligi https://github.com/dxpux (as a patch for fuzzystring)
42
+ * Ported to java from .net by denmase
43
+ */
44
+ @ Immutable
45
+ public class RatcliffObershelp implements
46
+ NormalizedStringSimilarity , NormalizedStringDistance {
47
+
48
+ /**
49
+ * Compute the Ratcliff-Obershelp similarity between strings.
50
+ *
51
+ * @param s1 The first string to compare.
52
+ * @param s2 The second string to compare.
53
+ * @return The RatcliffObershelp similarity in the range [0, 1]
54
+ * @throws NullPointerException if s1 or s2 is null.
55
+ */
56
+ public final double similarity (String source , String target ) {
57
+ if (source == null ) {
58
+ throw new NullPointerException ("source must not be null" );
59
+ }
60
+
61
+ if (target == null ) {
62
+ throw new NullPointerException ("target must not be null" );
63
+ }
64
+
65
+ if (source .equals (target )) {
66
+ return 1 ;
67
+ }
68
+
69
+ List <String > matches ; // = new ArrayList<>();
70
+ matches = getMatchQueue (source , target );
71
+ int sumOfMatches = 0 ;
72
+ Iterator it ;
73
+ it = matches .iterator ();
74
+
75
+ // Display element by element using Iterator
76
+ while (it .hasNext ()) {
77
+ String element = it .next ().toString ();
78
+ //System.out.println(element);
79
+ sumOfMatches += element .length ();
80
+ }
81
+ return 2.0d * sumOfMatches / (source .length () + target .length ());
82
+ }
83
+
84
+ /**
85
+ * Return 1 - similarity.
86
+ *
87
+ * @param s1 The first string to compare.
88
+ * @param s2 The second string to compare.
89
+ * @return 1 - similarity
90
+ * @throws NullPointerException if s1 or s2 is null.
91
+ */
92
+ public final double distance (final String s1 , final String s2 ) {
93
+ return 1.0 - similarity (s1 , s2 );
94
+ }
95
+
96
+ private static List <String > getMatchQueue (String source , String target ) {
97
+ List <String > list = new ArrayList <>();
98
+ String match = frontMaxMatch (source , target );
99
+ if (match .length () > 0 ) {
100
+ String frontSource = source .substring (0 , source .indexOf (match ));
101
+ String frontTarget = target .substring (0 , target .indexOf (match ));
102
+ List <String > frontQueue = getMatchQueue (frontSource , frontTarget );
103
+
104
+ String endSource = source .substring (source .indexOf (match ) + match .length ());
105
+ String endTarget = target .substring (target .indexOf (match ) + match .length ());
106
+ List <String > endQueue = getMatchQueue (endSource , endTarget );
107
+
108
+ list .add (match );
109
+ list .addAll (frontQueue );
110
+ list .addAll (endQueue );
111
+ }
112
+ return list ;
113
+ }
114
+
115
+ private static String frontMaxMatch (String firstString , String secondString ) {
116
+ int longest = 0 ;
117
+ String longestSubstring = "" ;
118
+
119
+ for (int i = 0 ; i < firstString .length (); ++i ) {
120
+ for (int j = i + 1 ; j <= firstString .length (); ++j ) {
121
+ String substring = firstString .substring (i , j );
122
+ if (secondString .contains (substring ) && substring .length () > longest ) {
123
+ longest = substring .length ();
124
+ longestSubstring = substring ;
125
+ }
126
+ }
127
+ }
128
+ return longestSubstring ;
129
+ }
130
+ }
0 commit comments