I have taken two algorithms, both on this page, the 200_Success 200_Success one, and the rolfl rolfl one. I have run both algorithms through two types of tests, and for each type, I ran it through data of different sizes. I measured the actual performance.
I have taken two algorithms, both on this page, the 200_Success one, and the rolfl one. I have run both algorithms through two types of tests, and for each type, I ran it through data of different sizes. I measured the actual performance.
I have taken two algorithms, both on this page, the 200_Success one, and the rolfl one. I have run both algorithms through two types of tests, and for each type, I ran it through data of different sizes. I measured the actual performance.
EDIT2:
hereThe HashSet based implementation has been revised to include hints on th size of the HashSets. The revised performance results are:
RLsparse,113.848,156.549,195.834,264.010,280.561,314.784,332.941,360.607,387.728,401.440
RLfreq,50.688,45.955,61.013,59.733,74.000,68.274,69.208,70.487,76.905,74.185
RLmedium,116.118,177.327,223.468,258.029,275.395,263.365,234.839,225.775,221.798,210.071
2Ssparse,127.956,160.641,159.736,171.221,180.202,195.650,265.159,328.633,270.251,369.847
2Sfreq,154.762,154.865,157.780,157.265,158.705,159.562,160.342,161.276,160.924,163.296
2Smedium,128.366,152.183,187.613,198.995,270.550,198.450,200.526,202.244,205.382,207.368
datasize,512,1024,2048,4096,8192,16384,32768,65536,131072,262144
iterations,8192,4096,2048,1024,512,256,128,64,32,16
And the corresponding plot is (Note the scale is the same as other plots, max 700ms):
enter image description here
Here is the code I used to generate the above graph (the actual algorithms not included since they are above...).
here is the code I used to generate the above graph (the actual algorithms not included since they are above...).
EDIT2:
The HashSet based implementation has been revised to include hints on th size of the HashSets. The revised performance results are:
RLsparse,113.848,156.549,195.834,264.010,280.561,314.784,332.941,360.607,387.728,401.440
RLfreq,50.688,45.955,61.013,59.733,74.000,68.274,69.208,70.487,76.905,74.185
RLmedium,116.118,177.327,223.468,258.029,275.395,263.365,234.839,225.775,221.798,210.071
2Ssparse,127.956,160.641,159.736,171.221,180.202,195.650,265.159,328.633,270.251,369.847
2Sfreq,154.762,154.865,157.780,157.265,158.705,159.562,160.342,161.276,160.924,163.296
2Smedium,128.366,152.183,187.613,198.995,270.550,198.450,200.526,202.244,205.382,207.368
datasize,512,1024,2048,4096,8192,16384,32768,65536,131072,262144
iterations,8192,4096,2048,1024,512,256,128,64,32,16
And the corresponding plot is (Note the scale is the same as other plots, max 700ms):
enter image description here
Here is the code I used to generate the above graph (the actual algorithms not included since they are above...).
EDIT:
After a request for 'medium' amounts of duplication (the random numbers are taken from a pool of 1024 instead of just 10):
RLsparse,124.211,147.412,216.475,273.934,293.235,330.671,346.458,372.003,399.730,407.985
RLfreq,42.496,41.446,53.732,58.305,66.281,65.889,64.953,66.078,73.901,71.291
RLmedium,122.813,179.644,218.106,253.908,272.583,259.016,232.671,226.121,220.121,207.180
2Ssparse,155.091,180.669,205.877,236.890,254.193,288.193,351.495,450.381,426.626,665.759
2Sfreq,172.426,173.460,173.988,174.462,175.049,174.477,175.384,172.826,174.230,175.976
2Smedium,162.969,210.496,232.372,235.070,229.555,222.412,219.141,219.358,218.502,221.412
And the corresponding plot is:
enter image description here
package dupcnt;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
public class CompareDups {
public static void main(String[] args) {
System.out.println("Building Data");
int[] sparse = new int[1<<18];
int[] freq = new int[sparse.length];
int[] medium = new int[sparse.length];
Random rand = new Random(1); // repeatable 'random' numbers
for (int i = 0; i < sparse.length; i++) {
sparse[i] = rand.nextInt(sparse.length);
}
for (int i = 0; i < freq.length; i++) {
freq[i] = rand.nextInt(10);
}
for (int i = 0; i < medium.length; i++) {
medium[i] = rand.nextInt(1024);
}
System.out.println("Built data " + sparse.length);
final int dpoints = 5;10;
int[] iterations = new int[dpoints];
for (int i = 0; i < iterations.length; i++) {
iterations[i] = 1 << dpoints + 3 - i;
}
for (int i = 0; i < 10; i++) {
long[][] data = new long[6][dpoints];long[8][dpoints];
for (int dp = 0; dp < dpoints; dp++) {
int[] input = null;
input = Arrays.copyOf(sparse, sparse.length >>> dpoints - 1 - dp);
data[4][dp]data[0][dp] = processRL(input.length;, iterations[dp]);
data[5][dp]data[3][dp] = iterations[dp];process2S(input, iterations[dp]);
data[0][dp]input = Arrays.copyOf(freq, freq.length >>> dpoints - 1 - dp);
data[1][dp] = processRL(input, iterations[dp]);
data[2][dp]data[4][dp] = process2S(input, iterations[dp]);
input = Arrays.copyOf(freqmedium, sparsemedium.length >>> dpoints - 1 - dp);
data[1][dp]data[2][dp] = processRL(input, iterations[dp]);
data[3][dp]data[5][dp] = process2S(input, iterations[dp]);
data[6][dp] = input.length;
data[7][dp] = iterations[dp];
}
String[] series = {"RLsparse", "RLfreq", "RLmedium", "2Ssparse", "2Sfreq", "2Smedium", "datasize", "iterations"};
for (int s = 0; s < series.length; s++) {
StringBuilder sb = new StringBuilder();
sb.append(series[s]);
for (long l : data[s]) {
if (s < 46) {
sb.append(",").append(String.format("%.3f", l/1000000.0));
} else {
sb.append(",").append(String.format("%d", l));
}
}
System.out.println(sb.toString());
}
}
}
private static long processRL(final int[] input, final int iterations) {
System.gc();
int cnt = 0;
long nanos = System.nanoTime();
for (int i = 0; i < iterations; i++) {
cnt += findDuplicates(input).length;
}
if (cnt < 0) {
throw new IllegalStateException("This is just to keep things from being optimized out.");
}
return System.nanoTime() - nanos;
}
private static long process2S(final int[] input, final int iterations) {
System.gc();
int cnt = 0;
long nanos = System.nanoTime();
for (int i = 0; i < iterations; i++) {
cnt += findEvenOccurrences(input).size();
}
if (cnt < 0) {
throw new IllegalStateException("This is just to keep things from being optimized out.");
}
return System.nanoTime() - nanos;
}
}
package dupcnt;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
public class CompareDups {
public static void main(String[] args) {
System.out.println("Building Data");
int[] sparse = new int[1<<18];
int[] freq = new int[sparse.length];
Random rand = new Random(1); // repeatable 'random' numbers
for (int i = 0; i < sparse.length; i++) {
sparse[i] = rand.nextInt(sparse.length);
}
for (int i = 0; i < freq.length; i++) {
freq[i] = rand.nextInt(10);
}
System.out.println("Built data " + sparse.length);
final int dpoints = 5;
int[] iterations = new int[dpoints];
for (int i = 0; i < iterations.length; i++) {
iterations[i] = 1 << dpoints + 3 - i;
}
for (int i = 0; i < 10; i++) {
long[][] data = new long[6][dpoints];
for (int dp = 0; dp < dpoints; dp++) {
int[] input = null;
input = Arrays.copyOf(sparse, sparse.length >>> dpoints - 1 - dp);
data[4][dp] = input.length;
data[5][dp] = iterations[dp];
data[0][dp] = processRL(input, iterations[dp]);
data[2][dp] = process2S(input, iterations[dp]);
input = Arrays.copyOf(freq, sparse.length >>> dpoints - 1 - dp);
data[1][dp] = processRL(input, iterations[dp]);
data[3][dp] = process2S(input, iterations[dp]);
}
String[] series = {"RLsparse", "RLfreq", "2Ssparse", "2Sfreq", "datasize", "iterations"};
for (int s = 0; s < series.length; s++) {
StringBuilder sb = new StringBuilder();
sb.append(series[s]);
for (long l : data[s]) {
if (s < 4) {
sb.append(",").append(String.format("%.3f", l/1000000.0));
} else {
sb.append(",").append(String.format("%d", l));
}
}
System.out.println(sb.toString());
}
}
}
private static long processRL(final int[] input, final int iterations) {
System.gc();
int cnt = 0;
long nanos = System.nanoTime();
for (int i = 0; i < iterations; i++) {
cnt += findDuplicates(input).length;
}
if (cnt < 0) {
throw new IllegalStateException("This is just to keep things from being optimized out.");
}
return System.nanoTime() - nanos;
}
private static long process2S(final int[] input, final int iterations) {
System.gc();
int cnt = 0;
long nanos = System.nanoTime();
for (int i = 0; i < iterations; i++) {
cnt += findEvenOccurrences(input).size();
}
if (cnt < 0) {
throw new IllegalStateException("This is just to keep things from being optimized out.");
}
return System.nanoTime() - nanos;
}
}
EDIT:
After a request for 'medium' amounts of duplication (the random numbers are taken from a pool of 1024 instead of just 10):
RLsparse,124.211,147.412,216.475,273.934,293.235,330.671,346.458,372.003,399.730,407.985
RLfreq,42.496,41.446,53.732,58.305,66.281,65.889,64.953,66.078,73.901,71.291
RLmedium,122.813,179.644,218.106,253.908,272.583,259.016,232.671,226.121,220.121,207.180
2Ssparse,155.091,180.669,205.877,236.890,254.193,288.193,351.495,450.381,426.626,665.759
2Sfreq,172.426,173.460,173.988,174.462,175.049,174.477,175.384,172.826,174.230,175.976
2Smedium,162.969,210.496,232.372,235.070,229.555,222.412,219.141,219.358,218.502,221.412
And the corresponding plot is:
enter image description here
package dupcnt;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
public class CompareDups {
public static void main(String[] args) {
System.out.println("Building Data");
int[] sparse = new int[1<<18];
int[] freq = new int[sparse.length];
int[] medium = new int[sparse.length];
Random rand = new Random(1); // repeatable 'random' numbers
for (int i = 0; i < sparse.length; i++) {
sparse[i] = rand.nextInt(sparse.length);
}
for (int i = 0; i < freq.length; i++) {
freq[i] = rand.nextInt(10);
}
for (int i = 0; i < medium.length; i++) {
medium[i] = rand.nextInt(1024);
}
System.out.println("Built data " + sparse.length);
final int dpoints = 10;
int[] iterations = new int[dpoints];
for (int i = 0; i < iterations.length; i++) {
iterations[i] = 1 << dpoints + 3 - i;
}
for (int i = 0; i < 10; i++) {
long[][] data = new long[8][dpoints];
for (int dp = 0; dp < dpoints; dp++) {
int[] input = null;
input = Arrays.copyOf(sparse, sparse.length >>> dpoints - 1 - dp);
data[0][dp] = processRL(input, iterations[dp]);
data[3][dp] = process2S(input, iterations[dp]);
input = Arrays.copyOf(freq, freq.length >>> dpoints - 1 - dp);
data[1][dp] = processRL(input, iterations[dp]);
data[4][dp] = process2S(input, iterations[dp]);
input = Arrays.copyOf(medium, medium.length >>> dpoints - 1 - dp);
data[2][dp] = processRL(input, iterations[dp]);
data[5][dp] = process2S(input, iterations[dp]);
data[6][dp] = input.length;
data[7][dp] = iterations[dp];
}
String[] series = {"RLsparse", "RLfreq", "RLmedium", "2Ssparse", "2Sfreq", "2Smedium", "datasize", "iterations"};
for (int s = 0; s < series.length; s++) {
StringBuilder sb = new StringBuilder();
sb.append(series[s]);
for (long l : data[s]) {
if (s < 6) {
sb.append(",").append(String.format("%.3f", l/1000000.0));
} else {
sb.append(",").append(String.format("%d", l));
}
}
System.out.println(sb.toString());
}
}
}
private static long processRL(final int[] input, final int iterations) {
System.gc();
int cnt = 0;
long nanos = System.nanoTime();
for (int i = 0; i < iterations; i++) {
cnt += findDuplicates(input).length;
}
if (cnt < 0) {
throw new IllegalStateException("This is just to keep things from being optimized out.");
}
return System.nanoTime() - nanos;
}
private static long process2S(final int[] input, final int iterations) {
System.gc();
int cnt = 0;
long nanos = System.nanoTime();
for (int i = 0; i < iterations; i++) {
cnt += findEvenOccurrences(input).size();
}
if (cnt < 0) {
throw new IllegalStateException("This is just to keep things from being optimized out.");
}
return System.nanoTime() - nanos;
}
}