|
| 1 | +/** This class highlights text differences between two plain strings by generating html fragment to show changes |
| 2 | + |
| 3 | +The buildLcsList() method finds longest substrings common to both text1 and text2 recursively producing a list of longest common substrings used in markTextDiff() to markup changes between text1 and text2. |
| 4 | + |
| 5 | +The longest common substrings are found using the LongestCommonSubstring (LCS) algorithm credited to https://algs4.cs.princeton.edu/code/edu/princeton/cs/algs4/LongestCommonSubstring.java.html |
| 6 | +Credits go to Robert Sedgewick and Kevin Wayne that provide the LCS algorithm with GNU General Public License |
| 7 | + |
| 8 | +Dependencies: SuffixArray.java |
| 9 | + |
| 10 | +version 1.0, 10-11-2022, first release |
| 11 | + |
| 12 | +*/ |
| 13 | +package lcs; |
| 14 | + |
| 15 | +import java.util.ArrayList; |
| 16 | + |
| 17 | +public class StringDiff { |
| 18 | + |
| 19 | + private static final String INSERT_COLOR = "#00ff66"; |
| 20 | + private static final String DELETE_COLOR = "#ff9933"; |
| 21 | + private static final int lcs_threshold = 3;//minimum threshold for longest common subsequence |
| 22 | + |
| 23 | + public static void main(String[] args) { |
| 24 | + |
| 25 | + String text1 = "Do not change this section. Please check any misqelling! Note that this section is obsolete."; |
| 26 | + String text2 = "New section added. Do not change this section. Please check any mispelling!"; |
| 27 | + |
| 28 | + ArrayList<String> lcsList = new ArrayList<>(); |
| 29 | + buildLcsList(lcsList, text1, text2); |
| 30 | + |
| 31 | + String result = markTextDiff(text1, text2, lcsList, INSERT_COLOR, DELETE_COLOR); |
| 32 | + System.out.println(result); |
| 33 | + } |
| 34 | + |
| 35 | + //build list lcsl of valid longest common subsequences between text1 and text2 |
| 36 | + public static void buildLcsList(ArrayList<String> lcsl, String text1, String text2) { |
| 37 | + String mLcs = lcs(text1, text2); |
| 38 | + if (mLcs.length() >= lcs_threshold) { |
| 39 | + int idx1 = text1.indexOf(mLcs); |
| 40 | + int idx2 = text2.indexOf(mLcs); |
| 41 | + buildLcsList(lcsl, text1.substring(0, idx1), text2.substring(0, idx2)); |
| 42 | + lcsl.add(mLcs); |
| 43 | + buildLcsList(lcsl, text1.substring(idx1 + mLcs.length()), text2.substring(idx2 + mLcs.length())); |
| 44 | + } |
| 45 | + } |
| 46 | + |
| 47 | + //highlights with htlm tags the changes from text1 to text2 using lcsList |
| 48 | + public static String markTextDiff(String text1, String text2, |
| 49 | + ArrayList<String> lcsList, String insertColor, String deleteColor) { |
| 50 | + StringBuilder stringBuilder = new StringBuilder(); |
| 51 | + |
| 52 | + int cur1 = 0, cur2 = 0;//cursors |
| 53 | + for (int k = 0; k < lcsList.size(); k++) { |
| 54 | + String mLcs = lcsList.get(k); |
| 55 | + int idx1 = text1.indexOf(mLcs, cur1); |
| 56 | + int idx2 = text2.indexOf(mLcs, cur2); |
| 57 | + if (idx1 > cur1) { |
| 58 | + stringBuilder.append("<del style='background-color:").append(deleteColor).append("'>").append(text1.substring(cur1, idx1)).append("</del>"); |
| 59 | + } |
| 60 | + if (idx2 > cur2) { |
| 61 | + stringBuilder.append("<ins style='background-color:").append(insertColor).append("'>").append(text2.substring(cur2, idx2)).append("</ins>"); |
| 62 | + } |
| 63 | + stringBuilder.append(lcsList.get(k)); |
| 64 | + cur1 = idx1 + mLcs.length(); |
| 65 | + cur2 = idx2 + mLcs.length(); |
| 66 | + } |
| 67 | + if (cur1 < text1.length()) { |
| 68 | + stringBuilder.append("<del style='background-color:").append(deleteColor).append("'>").append(text1.substring(cur1)).append("</del>"); |
| 69 | + } |
| 70 | + if (cur2 < text2.length()) { |
| 71 | + stringBuilder.append("<ins style='background-color:").append(insertColor).append("'>").append(text2.substring(cur2)).append("</ins>"); |
| 72 | + } |
| 73 | + return stringBuilder.toString(); |
| 74 | + } |
| 75 | + |
| 76 | + |
| 77 | + // return the longest common prefix of suffix s[p..] and suffix t[q..] |
| 78 | + private static String lcp(String s, int p, String t, int q) { |
| 79 | + int n = Math.min(s.length() - p, t.length() - q); |
| 80 | + for (int i = 0; i < n; i++) { |
| 81 | + if (s.charAt(p + i) != t.charAt(q + i)) |
| 82 | + return s.substring(p, p + i); |
| 83 | + } |
| 84 | + return s.substring(p, p + n); |
| 85 | + } |
| 86 | + |
| 87 | + // compare suffix s[p..] and suffix t[q..] |
| 88 | + private static int compare(String s, int p, String t, int q) { |
| 89 | + int n = Math.min(s.length() - p, t.length() - q); |
| 90 | + for (int i = 0; i < n; i++) { |
| 91 | + if (s.charAt(p + i) != t.charAt(q + i)) |
| 92 | + return s.charAt(p + i) - t.charAt(q + i); |
| 93 | + } |
| 94 | + return Integer.compare(s.length() - p, t.length() - q); |
| 95 | + } |
| 96 | + |
| 97 | + /** |
| 98 | + * Returns the longest common string of the two specified strings. |
| 99 | + * |
| 100 | + * @param s one string |
| 101 | + * @param t the other string |
| 102 | + * @return the longest common string that appears as a substring |
| 103 | + * in both <tt>s</tt> and <tt>t</tt>; the empty string |
| 104 | + * if no such string |
| 105 | + */ |
| 106 | + public static String lcs(String s, String t) { |
| 107 | + SuffixArray suffix1 = new SuffixArray(s); |
| 108 | + SuffixArray suffix2 = new SuffixArray(t); |
| 109 | + |
| 110 | + // find longest common substring by "merging" sorted suffixes |
| 111 | + String lcs = ""; |
| 112 | + int i = 0, j = 0; |
| 113 | + while (i < s.length() && j < t.length()) { |
| 114 | + int p = suffix1.index(i); |
| 115 | + int q = suffix2.index(j); |
| 116 | + String x = lcp(s, p, t, q); |
| 117 | + if (x.length() > lcs.length()) lcs = x; |
| 118 | + if (compare(s, p, t, q) < 0) |
| 119 | + i++; |
| 120 | + else j++; |
| 121 | + } |
| 122 | + return lcs; |
| 123 | + } |
| 124 | + |
| 125 | +} |
0 commit comments