Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 2c9c528

Browse files
committed
added java code
1 parent ec96b62 commit 2c9c528

File tree

2 files changed

+265
-0
lines changed

2 files changed

+265
-0
lines changed

‎lcs/StringDiff.java‎

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
/** This class highlights text differences between two plain strings by generating html fragment to show changes
2+
3+
The buildLcsList() method finds longest substrings common to both text1 and text2 recursively producing a list of longest common substrings used in markTextDiff() to markup changes between text1 and text2.
4+
5+
The longest common substrings are found using the LongestCommonSubstring (LCS) algorithm credited to https://algs4.cs.princeton.edu/code/edu/princeton/cs/algs4/LongestCommonSubstring.java.html
6+
Credits go to Robert Sedgewick and Kevin Wayne that provide the LCS algorithm with GNU General Public License
7+
8+
Dependencies: SuffixArray.java
9+
10+
version 1.0, 10-11-2022, first release
11+
12+
*/
13+
package lcs;
14+
15+
import java.util.ArrayList;
16+
17+
public class StringDiff {
18+
19+
private static final String INSERT_COLOR = "#00ff66";
20+
private static final String DELETE_COLOR = "#ff9933";
21+
private static final int lcs_threshold = 3;//minimum threshold for longest common subsequence
22+
23+
public static void main(String[] args) {
24+
25+
String text1 = "Do not change this section. Please check any misqelling! Note that this section is obsolete.";
26+
String text2 = "New section added. Do not change this section. Please check any mispelling!";
27+
28+
ArrayList<String> lcsList = new ArrayList<>();
29+
buildLcsList(lcsList, text1, text2);
30+
31+
String result = markTextDiff(text1, text2, lcsList, INSERT_COLOR, DELETE_COLOR);
32+
System.out.println(result);
33+
}
34+
35+
//build list lcsl of valid longest common subsequences between text1 and text2
36+
public static void buildLcsList(ArrayList<String> lcsl, String text1, String text2) {
37+
String mLcs = lcs(text1, text2);
38+
if (mLcs.length() >= lcs_threshold) {
39+
int idx1 = text1.indexOf(mLcs);
40+
int idx2 = text2.indexOf(mLcs);
41+
buildLcsList(lcsl, text1.substring(0, idx1), text2.substring(0, idx2));
42+
lcsl.add(mLcs);
43+
buildLcsList(lcsl, text1.substring(idx1 + mLcs.length()), text2.substring(idx2 + mLcs.length()));
44+
}
45+
}
46+
47+
//highlights with htlm tags the changes from text1 to text2 using lcsList
48+
public static String markTextDiff(String text1, String text2,
49+
ArrayList<String> lcsList, String insertColor, String deleteColor) {
50+
StringBuilder stringBuilder = new StringBuilder();
51+
52+
int cur1 = 0, cur2 = 0;//cursors
53+
for (int k = 0; k < lcsList.size(); k++) {
54+
String mLcs = lcsList.get(k);
55+
int idx1 = text1.indexOf(mLcs, cur1);
56+
int idx2 = text2.indexOf(mLcs, cur2);
57+
if (idx1 > cur1) {
58+
stringBuilder.append("<del style='background-color:").append(deleteColor).append("'>").append(text1.substring(cur1, idx1)).append("</del>");
59+
}
60+
if (idx2 > cur2) {
61+
stringBuilder.append("<ins style='background-color:").append(insertColor).append("'>").append(text2.substring(cur2, idx2)).append("</ins>");
62+
}
63+
stringBuilder.append(lcsList.get(k));
64+
cur1 = idx1 + mLcs.length();
65+
cur2 = idx2 + mLcs.length();
66+
}
67+
if (cur1 < text1.length()) {
68+
stringBuilder.append("<del style='background-color:").append(deleteColor).append("'>").append(text1.substring(cur1)).append("</del>");
69+
}
70+
if (cur2 < text2.length()) {
71+
stringBuilder.append("<ins style='background-color:").append(insertColor).append("'>").append(text2.substring(cur2)).append("</ins>");
72+
}
73+
return stringBuilder.toString();
74+
}
75+
76+
77+
// return the longest common prefix of suffix s[p..] and suffix t[q..]
78+
private static String lcp(String s, int p, String t, int q) {
79+
int n = Math.min(s.length() - p, t.length() - q);
80+
for (int i = 0; i < n; i++) {
81+
if (s.charAt(p + i) != t.charAt(q + i))
82+
return s.substring(p, p + i);
83+
}
84+
return s.substring(p, p + n);
85+
}
86+
87+
// compare suffix s[p..] and suffix t[q..]
88+
private static int compare(String s, int p, String t, int q) {
89+
int n = Math.min(s.length() - p, t.length() - q);
90+
for (int i = 0; i < n; i++) {
91+
if (s.charAt(p + i) != t.charAt(q + i))
92+
return s.charAt(p + i) - t.charAt(q + i);
93+
}
94+
return Integer.compare(s.length() - p, t.length() - q);
95+
}
96+
97+
/**
98+
* Returns the longest common string of the two specified strings.
99+
*
100+
* @param s one string
101+
* @param t the other string
102+
* @return the longest common string that appears as a substring
103+
* in both <tt>s</tt> and <tt>t</tt>; the empty string
104+
* if no such string
105+
*/
106+
public static String lcs(String s, String t) {
107+
SuffixArray suffix1 = new SuffixArray(s);
108+
SuffixArray suffix2 = new SuffixArray(t);
109+
110+
// find longest common substring by "merging" sorted suffixes
111+
String lcs = "";
112+
int i = 0, j = 0;
113+
while (i < s.length() && j < t.length()) {
114+
int p = suffix1.index(i);
115+
int q = suffix2.index(j);
116+
String x = lcp(s, p, t, q);
117+
if (x.length() > lcs.length()) lcs = x;
118+
if (compare(s, p, t, q) < 0)
119+
i++;
120+
else j++;
121+
}
122+
return lcs;
123+
}
124+
125+
}

‎lcs/SuffixArray.java‎

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
/**
2+
Suffix array is based on https://algs4.cs.princeton.edu/code/edu/princeton/cs/algs4/SuffixArray.java
3+
Credits go to Robert Sedgewick and Kevin Wayne that provide this class based on GNU General Public License
4+
*/
5+
6+
package lcs;
7+
import java.util.Arrays;
8+
9+
public class SuffixArray {
10+
private final Suffix[] suffixes;
11+
12+
/**
13+
* Initializes a suffix array for the given <tt>text</tt> string.
14+
* @param text the input string
15+
*/
16+
public SuffixArray(String text) {
17+
int N = text.length();
18+
this.suffixes = new Suffix[N];
19+
for (int i = 0; i < N; i++)
20+
suffixes[i] = new Suffix(text, i);
21+
Arrays.sort(suffixes);
22+
}
23+
24+
private static class Suffix implements Comparable<Suffix> {
25+
private final String text;
26+
private final int index;
27+
28+
private Suffix(String text, int index) {
29+
this.text = text;
30+
this.index = index;
31+
}
32+
33+
private int length() {
34+
return text.length() - index;
35+
}
36+
37+
private char charAt(int i) {
38+
return text.charAt(index + i);
39+
}
40+
41+
public int compareTo(Suffix that) {
42+
if (this == that) return 0; // optimization
43+
int N = Math.min(this.length(), that.length());
44+
for (int i = 0; i < N; i++) {
45+
if (this.charAt(i) < that.charAt(i)) return -1;
46+
if (this.charAt(i) > that.charAt(i)) return +1;
47+
}
48+
return this.length() - that.length();
49+
}
50+
51+
public String toString() {
52+
return text.substring(index);
53+
}
54+
}
55+
56+
/**
57+
* Returns the length of the input string.
58+
* @return the length of the input string
59+
*/
60+
public int length() {
61+
return suffixes.length;
62+
}
63+
64+
65+
/**
66+
* Returns the index into the original string of the <em>i</em>th smallest suffix.
67+
* That is, <tt>text.substring(sa.index(i))</tt> is the <em>i</em>th smallest suffix.
68+
* @param i an integer between 0 and <em>N</em>-1
69+
* @return the index into the original string of the <em>i</em>th smallest suffix
70+
* @throws java.lang.IndexOutOfBoundsException unless 0 &le; <em>i</em> &lt; <Em>N</em>
71+
*/
72+
public int index(int i) {
73+
if (i < 0 || i >= suffixes.length) throw new IndexOutOfBoundsException();
74+
return suffixes[i].index;
75+
}
76+
77+
78+
/**
79+
* Returns the length of the longest common prefix of the <em>i</em>th
80+
* smallest suffix and the <em>i</em>-1st smallest suffix.
81+
* @param i an integer between 1 and <em>N</em>-1
82+
* @return the length of the longest common prefix of the <em>i</em>th
83+
* smallest suffix and the <em>i</em>-1st smallest suffix.
84+
* @throws java.lang.IndexOutOfBoundsException unless 1 &le; <em>i</em> &lt; <em>N</em>
85+
*/
86+
public int lcp(int i) {
87+
if (i < 1 || i >= suffixes.length) throw new IndexOutOfBoundsException();
88+
return lcp(suffixes[i], suffixes[i - 1]);
89+
}
90+
91+
// longest common prefix of s and t
92+
private static int lcp(Suffix s, Suffix t) {
93+
int N = Math.min(s.length(), t.length());
94+
for (int i = 0; i < N; i++) {
95+
if (s.charAt(i) != t.charAt(i)) return i;
96+
}
97+
return N;
98+
}
99+
100+
/**
101+
* Returns the <em>i</em>th smallest suffix as a string.
102+
* @param i the index
103+
* @return the <em>i</em> smallest suffix as a string
104+
* @throws java.lang.IndexOutOfBoundsException unless 0 &le; <em>i</em> &lt; <Em>N</em>
105+
*/
106+
public String select(int i) {
107+
if (i < 0 || i >= suffixes.length) throw new IndexOutOfBoundsException();
108+
return suffixes[i].toString();
109+
}
110+
111+
/**
112+
* Returns the number of suffixes strictly less than the <tt>query</tt> string.
113+
* We note that <tt>rank(select(i))</tt> equals <tt>i</tt> for each <tt>i</tt>
114+
* between 0 and <em>N</em>-1.
115+
* @param query the query string
116+
* @return the number of suffixes strictly less than <tt>query</tt>
117+
*/
118+
public int rank(String query) {
119+
int lo = 0, hi = suffixes.length - 1;
120+
while (lo <= hi) {
121+
int mid = lo + (hi - lo) / 2;
122+
int cmp = compare(query, suffixes[mid]);
123+
if (cmp < 0) hi = mid - 1;
124+
else if (cmp > 0) lo = mid + 1;
125+
else return mid;
126+
}
127+
return lo;
128+
}
129+
130+
// compare query string to suffix
131+
private static int compare(String query, Suffix suffix) {
132+
int N = Math.min(query.length(), suffix.length());
133+
for (int i = 0; i < N; i++) {
134+
if (query.charAt(i) < suffix.charAt(i)) return -1;
135+
if (query.charAt(i) > suffix.charAt(i)) return +1;
136+
}
137+
return query.length() - suffix.length();
138+
}
139+
140+
}

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /