Ramer-Douglas-Peucker is a great algorithm for reducing the number of samples in a given trace, and also for keeping its general shape (as much as he can).
I have a trace with 32.000.000 samples, and I want to compute a reduced version in order to be able to plot it into my winform application.
The first time I used this implementation, time execution was about ~3mn on 32M samples, and the reduced version has 450K samples with Tolerance = 12, and the shape was amazingly preserved.
I tried to improve it, and the time execution is now about ~14sec.
- Getting rid of Math.Pow made me win 121sec...
- Using unsafe code + pointers --> -17sec
- Splitting the work into different threads --> -28sec (on a Core 2 Duo)
Someone could maybe tell me if there is a way to go faster?
public class RDPWorker
{
public int start;
public double Tolerance;
public volatile float[] samples;
public List<int> samplesToKeep;
public void DoWork()
{
int h = samples.Length / 2;
float[] half = new float[h];
unsafe
{
fixed (float* ptr = samples, ptrH = half)
{
float* _half = ptrH;
for (int i = start; i < h; ++i)
*(_half) = *(ptr + i);
}
}
FilterUtils.DouglasPeuckerReduction(half, 0, h - 1, Tolerance, ref samplesToKeep);
}
}
//[...]
//Piece of my FilterUtils class
public static float[] DouglasPeuckerReduction(float[] points, Double Tolerance)
{
if (points == null || points.Length < 3)
return points;
int firstPoint = 0;
int lastPoint = 0;
List<int> pointIndexsToKeep = new List<int>();
pointIndexsToKeep.Add(firstPoint);
pointIndexsToKeep.Add(lastPoint);
int h = points.Length / Environment.ProcessorCount;
List<RDPWorker> workers = new List<RDPWorker>();
List<Thread> threads = new List<Thread>();
int cpu = 0;
while (cpu < Environment.ProcessorCount)
{
RDPWorker _w = new RDPWorker();
_w.samplesToKeep = new List<int>();
_w.Tolerance = Tolerance;
_w.start = h * cpu;
_w.samples = points;
workers.Add(_w);
++cpu;
}
Stopwatch sw = new Stopwatch();
sw.Start();
foreach (RDPWorker worker in workers)
{
Thread t = new Thread(worker.DoWork);
t.IsBackground = true;
threads.Add(t);
t.Start();
}
foreach (Thread thread in threads)
thread.Join();
sw.Stop();
Console.WriteLine("Time = " + sw.ElapsedMilliseconds + " ms");
threads.Clear();
threads = null;
foreach (RDPWorker worker in workers)
pointIndexsToKeep = pointIndexsToKeep.Concat(worker.samplesToKeep).ToList();
workers.Clear();
workers = null;
int l = pointIndexsToKeep.Count;
float[] returnPoints = new float[l];
pointIndexsToKeep.Sort();
unsafe
{
fixed (float* ptr = points, result = returnPoints)
{
float* res = result;
for (int i = 0; i < l; ++i)
*(res + i) = *(ptr + pointIndexsToKeep[i]);
}
}
pointIndexsToKeep.Clear();
pointIndexsToKeep = null;
return returnPoints;
}
internal static void DouglasPeuckerReduction(float[] points, int firstPoint, int lastPoint, Double tolerance, ref List<int> pointIndexsToKeep)
{
float maxDistance = 0, tmp = 0, area = 0, X = 0, Y = 0, bottom = 0, distance = 0;
int indexFarthest = 0;
unsafe
{
fixed (float* samples = points)
{
for (int i = firstPoint; i < lastPoint; ++i)
{
//Perpendicular distance
tmp = 0.5f * ((lastPoint - i) * (firstPoint - i) + (*(samples + lastPoint) - *(samples + i)) * (*(samples + firstPoint) - *(samples + i)));
//Abs
area = tmp < 0 ? -tmp : tmp;
X = (firstPoint - lastPoint);
Y = (*(samples + firstPoint) - *(samples + lastPoint));
bottom = Sqrt((X * X) + (Y * Y));
distance = area / bottom;
if (distance > maxDistance)
{
maxDistance = distance;
indexFarthest = i;
}
}
}
}
if (maxDistance > tolerance && indexFarthest != 0)
{
//Add the largest point that exceeds the tolerance
pointIndexsToKeep.Add(indexFarthest);
DouglasPeuckerReduction(points, firstPoint, indexFarthest, tolerance, ref pointIndexsToKeep);
DouglasPeuckerReduction(points, indexFarthest, lastPoint, tolerance, ref pointIndexsToKeep);
}
}
//http://blog.wouldbetheologian.com/2011/11/fast-approximate-sqrt-method-in-c.html
internal static float Sqrt(float z)
{
if (z == 0) return 0;
FloatIntUnion u;
u.tmp = 0;
u.f = z;
u.tmp -= 1 << 23; /* Subtract 2^m. */
u.tmp >>= 1; /* Divide by 2. */
u.tmp += 1 << 29; /* Add ((b + 1) / 2) * 2^m. */
return u.f;
}
5 Answers 5
Note that the algorithm only ever compares distances never using their actual value.
This means that you can speed up your algorithm significantly by directly comparing the squares of the distances instead. This works because the sqrt
function is monotonic and thus sqrt(a) < sqrt(b)
if and only if a < b
.
This allows to change your DouglasPeuckerReduction()
function to this:
internal static void DouglasPeuckerReduction(float[] points, int firstPoint, int lastPoint, Double toleranceSquared, ref List<int> pointIndexsToKeep)
{
float maxDistanceSquared = 0, tmp = 0, areaSquared = 0, X = 0, Y = 0, bottomSquared = 0, distanceSquared = 0;
int indexFarthest = 0;
unsafe
{
fixed (float* samples = points)
{
for (int i = firstPoint; i < lastPoint; ++i)
{
//Perpendicular distance
tmp = 0.5f * ((lastPoint - i) * (firstPoint - i) + (*(samples + lastPoint) - *(samples + i)) * (*(samples + firstPoint) - *(samples + i)));
//Abs
areaSquared = tmp * tmp;
X = (firstPoint - lastPoint);
Y = (*(samples + firstPoint) - *(samples + lastPoint));
bottomSquared = X * X + Y * Y;
distanceSquared = areaSquared / bottomSquared;
if (distanceSquared > maxDistanceSquared)
{
maxDistanceSquared = distanceSquared;
indexFarthest = i;
}
}
}
}
if (maxDistanceSquared > toleranceSquared && indexFarthest != 0)
{
//Add the largest point that exceeds the tolerance
DouglasPeuckerReduction(points, firstPoint, indexFarthest, toleranceSquared, ref pointIndexsToKeep);
pointIndexsToKeep.Add(indexFarthest);
DouglasPeuckerReduction(points, indexFarthest, lastPoint, toleranceSquared, ref pointIndexsToKeep);
}
}
and call it with the tolerance squared and you'll get the same result in almost half the time. Note that your special Sqrt
function is not used anymore and can now be removed.
Also note that I changed the order in which the pointsToKeep
list is populated in the last three lines. This adds the indices in order and allows you to remove the sorting step in line 83 which should also save you some time.
You could initialize the capacity of the lists (via the constructor) you are generating. The closer you can get to the size the list will be after all points are added the better.
The reason you would do this is that the list implementation is just a wrapper around an array starting at some size. Every time you add an item to the list it first checks to see if the array has enough space and, if not, it will create a new larger array, copy the contents from the array that is full, and add your new item. It is pretty standard for the size of the array to double each time it is resized. The resizing is expensive, so setting the capacity to something close to the total number of items that it will contain can improve performance.
Also, be sure not to set the capacity too high, you can end up wasting a good bit of memory that way (if that is a concern.)
-
\$\begingroup\$ Hi, thanks for your suggestion. I tried to make the size of Lists fixed but I didn't notice any improvement in the time execution. RDP is fast with smaller set of data. May be I have to apply another filtering method before, in order to reduce a little bit the number of samples, I don't know. \$\endgroup\$D4l3k– D4l3k2013年07月29日 09:06:23 +00:00Commented Jul 29, 2013 at 9:06
-
\$\begingroup\$ To do so, I know that with tolerance = 12, RDP reduces my trace from 32.000.000 --> 450.052 samples, which is perfect for me. Thus I try to use array of the same size instead of using List<float>. But it "just" made me win ~150ms :). \$\endgroup\$D4l3k– D4l3k2013年07月29日 12:40:03 +00:00Commented Jul 29, 2013 at 12:40
The best way I could imagine is, to use loops and stacks to implement the DouglasPeuckerReduction function. A plain recursion, though simple to implement, isn't so efficient in terms of speed.
-
\$\begingroup\$ Recursion is quite efficient. Poor implementation of stack can be worse than recursion. In fact this is a matter of time measurement. Although there is also risk of stack overflow when using recursion \$\endgroup\$sergtk– sergtk2014年03月26日 22:09:28 +00:00Commented Mar 26, 2014 at 22:09
There maybe two mistakes in the algorithm. One is line 4 of DouglasPeuckerReduction algorithm, that the lastPoint should be equal to points.length, and another is in line 1 of the "dowork" function. although the algorithm is much faster than its previous version, its correctness is quite low especially in spatial data compression.
Use var
during local variable declarations when the right-hand side makes the type obvious. This saves you extra typing every time you want to change type. You should also use var
when declaring loop index.
Single-letter variables, unless used in loop indices, are needlessly confusing for a maintenance programmer. Use descriptive names. There's a reason one of the first steps an obfuscator makes is to use single-letter variable names.
Lastly, you've misspelled pointIndexsToKeep
Explore related questions
See similar questions with these tags.
Parallel.ForEach(workers, w => w.DoWork());
would be as effective. Though it would not improve performance. Also yourClear
and null assignments are not needed. \$\endgroup\$