SuffixArray.java


Below is the syntax highlighted version of SuffixArray.java from §6.3 Suffix Arrays.


/******************************************************************************
 * Compilation: javac SuffixArray.java
 * Execution: java SuffixArray < input.txt
 * Dependencies: StdIn.java StdOut.java
 * Data files: https://algs4.cs.princeton.edu/63suffix/abra.txt
 *
 * A data type that computes the suffix array of a string.
 *
 * % java SuffixArray < abra.txt
 * i ind lcp rnk select
 * ---------------------------
 * 0 11 - 0 "!"
 * 1 10 0 1 "A!"
 * 2 7 1 2 "ABRA!"
 * 3 0 4 3 "ABRACADABRA!"
 * 4 3 1 4 "ACADABRA!"
 * 5 5 1 5 "ADABRA!"
 * 6 8 0 6 "BRA!"
 * 7 1 3 7 "BRACADABRA!"
 * 8 4 0 8 "CADABRA!"
 * 9 6 0 9 "DABRA!"
 * 10 9 0 10 "RA!"
 * 11 2 2 11 "RACADABRA!"
 *
 * See SuffixArrayX.java for an optimized version that uses 3-way
 * radix quicksort and does not use the nested class Suffix.
 *
 ******************************************************************************/
import java.util.Arrays;
/**
 * The {@code SuffixArray} class represents a suffix array of a string of
 * length <em>n</em>.
 * It supports the <em>selecting</em> the <em>i</em>th smallest suffix,
 * getting the <em>index</em> of the <em>i</em>th smallest suffix,
 * computing the length of the <em>longest common prefix</em> between the
 * <em>i</em>th smallest suffix and the <em>i</em>-1st smallest suffix,
 * and determining the <em>rank</em> of a query string (which is the number
 * of suffixes strictly less than the query string).
 * <p>
 * This implementation uses a nested class {@code Suffix} to represent
 * a suffix of a string (using constant time and space) and
 * {@code Arrays.sort()} to sort the array of suffixes.
 * The <em>index</em> and <em>length</em> operations takes constant time
 * in the worst case. The <em>lcp</em> operation takes time proportional to the
 * length of the longest common prefix.
 * The <em>select</em> operation takes time proportional
 * to the length of the suffix and should be used primarily for debugging.
 * <p>
 * For alternate implementations of the same API, see
 * {@link SuffixArrayX}, which is faster in practice (uses 3-way radix quicksort)
 * and uses less memory (does not create {@code Suffix} objects)
 * and <ahref= "https://algs4.cs.princeton.edu/63suffix/SuffixArrayJava6.java.html">SuffixArrayJava6.java</a>,
 * which relies on the constant-time substring extraction method that existed
 * in Java 6.
 * <p>
 * For additional documentation, see <ahref="https://algs4.cs.princeton.edu/63suffix">Section 6.3</a> of
 * <i>Algorithms, 4th Edition</i> by Robert Sedgewick and Kevin Wayne.
 */
publicclassSuffixArray{
private Suffix[] suffixes;
/**
 * Initializes a suffix array for the given {@code text} string.
 * @param text the input string
 */
publicSuffixArray(String text){
int n = text.length();
this.suffixes =new Suffix[n];
for(int i =0; i < n; i++)
 suffixes[i]=newSuffix(text, i);
 Arrays.sort(suffixes);
}
privatestaticclassSuffiximplements Comparable<Suffix>{
privatefinalString text;
privatefinalint index;
privateSuffix(String text,int index){
this.text = text;
this.index = index;
}
privateintlength(){
return text.length()- index;
}
privatecharcharAt(int i){
return text.charAt(index + i);
}
publicintcompareTo(Suffix that){
if(this== that)return0;// optimization
int n = Math.min(this.length(), that.length());
for(int i =0; i < n; i++){
if(this.charAt(i)< that.charAt(i))return-1;
if(this.charAt(i)> that.charAt(i))return+1;
}
returnthis.length()- that.length();
}
publicStringtoString(){
return text.substring(index);
}
}
/**
 * Returns the length of the input string.
 * @return the length of the input string
 */
publicintlength(){
return suffixes.length;
}
/**
 * Returns the index into the original string of the <em>i</em>th smallest suffix.
 * That is, {@code text.substring(sa.index(i))} is the <em>i</em>th smallest suffix.
 * @param i an integer between 0 and <em>n</em>-1
 * @return the index into the original string of the <em>i</em>th smallest suffix
 * @throws java.lang.IllegalArgumentException unless {@code 0 <= i < n}
 */
publicintindex(int i){
if(i <0|| i >= suffixes.length)thrownewIllegalArgumentException();
return suffixes[i].index;
}
/**
 * Returns the length of the longest common prefix of the <em>i</em>th
 * smallest suffix and the <em>i</em>-1st smallest suffix.
 * @param i an integer between 1 and <em>n</em>-1
 * @return the length of the longest common prefix of the <em>i</em>th
 * smallest suffix and the <em>i</em>-1st smallest suffix.
 * @throws java.lang.IllegalArgumentException unless {@code 1 <= i < n}
 */
publicintlcp(int i){
if(i <1|| i >= suffixes.length)thrownewIllegalArgumentException();
returnlcpSuffix(suffixes[i], suffixes[i-1]);
}
// longest common prefix of s and t
privatestaticintlcpSuffix(Suffix s,Suffix t){
int n = Math.min(s.length(), t.length());
for(int i =0; i < n; i++){
if(s.charAt(i)!= t.charAt(i))return i;
}
return n;
}
/**
 * Returns the <em>i</em>th smallest suffix as a string.
 * @param i the index
 * @return the <em>i</em> smallest suffix as a string
 * @throws java.lang.IllegalArgumentException unless {@code 0 <= i < n}
 */
publicStringselect(int i){
if(i <0|| i >= suffixes.length)thrownewIllegalArgumentException();
return suffixes[i].toString();
}
/**
 * Returns the number of suffixes strictly less than the {@code query} string.
 * We note that {@code rank(select(i))} equals {@code i} for each {@code i}
 * between 0 and <em>n</em>-1.
 * @param query the query string
 * @return the number of suffixes strictly less than {@code query}
 */
publicintrank(String query){
int lo =0, hi = suffixes.length -1;
while(lo <= hi){
int mid = lo +(hi - lo)/2;
int cmp =compare(query, suffixes[mid]);
if(cmp <0) hi = mid -1;
elseif(cmp >0) lo = mid +1;
elsereturn mid;
}
return lo;
}
// compare query string to suffix
privatestaticintcompare(String query,Suffix suffix){
int n = Math.min(query.length(), suffix.length());
for(int i =0; i < n; i++){
if(query.charAt(i)< suffix.charAt(i))return-1;
if(query.charAt(i)> suffix.charAt(i))return+1;
}
return query.length()- suffix.length();
}
/**
 * Unit tests the {@code SuffixArray} data type.
 *
 * @param args the command-line arguments
 */
publicstaticvoidmain(String[] args){
String s = StdIn.readAll().replaceAll("\\s+"," ").trim();
SuffixArray suffix =newSuffixArray(s);
// StdOut.println("rank(" + args[0] + ") = " + suffix.rank(args[0]));
 StdOut.println(" i ind lcp rnk select");
 StdOut.println("---------------------------");
for(int i =0; i < s.length(); i++){
int index = suffix.index(i);
String ith ="\""+ s.substring(index, Math.min(index +50, s.length()))+"\"";
assert s.substring(index).equals(suffix.select(i));
int rank = suffix.rank(s.substring(index));
if(i ==0){
 StdOut.printf("%3d %3d %3s %3d %s\n", i, index,"-", rank, ith);
}
else{
int lcp = suffix.lcp(i);
 StdOut.printf("%3d %3d %3d %3d %s\n", i, index, lcp, rank, ith);
}
}
}
}

AltStyle によって変換されたページ (->オリジナル) /


Copyright © 2000–2019, Robert Sedgewick and Kevin Wayne.
Last updated: Thu Aug 11 09:36:50 EDT 2022.