I've found my app, which uses the SortedList
, has a poor performance.
So I decided to try to improve the performance of my app.
I created the FastSortedList
class which I think is faster than the original one.
public class FastSortedList<TKey, TValue>
{
IComparer<TKey> _comparer;
List<KeyValuePair<TKey, TValue>> _list = new List<KeyValuePair<TKey, TValue>>();
Dictionary<TKey, TValue> _dict = new Dictionary<TKey, TValue>();
public FastSortedList(IComparer<TKey> comparer)
{
_comparer = comparer ?? Comparer<TKey>.Default;
}
public int Count => _list.Count;
public IEnumerable<TValue> Values => _list.Select(i => i.Value);
public void Add(TKey key, TValue value)
{
if (_dict.ContainsKey(key))
throw new ArgumentException("An entry with the same key already exists.");
int index = BinarySearch(key);
if (index < 0)
{
_list.Insert(~index, new KeyValuePair<TKey, TValue>(key, value));
}
else
{
_list.Insert(index, new KeyValuePair<TKey, TValue>(key, value));
}
_dict[key] = value;
}
public int IndexOfKey(TKey key)
{
if (_dict.ContainsKey(key))
return BinarySearch(key);
return -1;
}
public void RemoveAt(int index)
{
if (index < 0)
return;
var item = _list[index];
_list.RemoveAt(index);
_dict.Remove(item.Key);
}
private int BinarySearch(TKey key)
{
int lo = 0;
int hi = _list.Count - 1;
while (lo <= hi)
{
int i = GetMedian(lo, hi);
int c = _comparer.Compare(_list[i].Key, key);
if (c == 0)
return i;
if (c < 0)
{
lo = i + 1;
}
else
{
hi = i - 1;
}
}
return ~lo;
}
private int GetMedian(int lo, int hi)
{
return lo + ((hi - lo) >> 1);
}
}
The used comparer:
public class TestComparer : IComparer<string>
{
public int Compare(string x, string y)
{
return string.Compare(x, y);
}
}
Comparing with Stopwatch
:
private static void CompareWithStopwatch()
{
int index;
IEnumerable<int> values;
FastSortedList<string, int> fast = new FastSortedList<string, int>(new TestComparer());
Stopwatch sw = new Stopwatch();
sw.Start();
for (int i = 0; i < 1000000; i++)
{
fast.Add($"test{i}", i);
}
sw.Stop();
Console.WriteLine($"FastSortedList: Populate records: elapsed time: {sw.Elapsed.TotalSeconds} sec, total records: {fast.Count}");
sw.Start();
index = fast.IndexOfKey("test745723");
sw.Stop();
Console.WriteLine($"FastSortedList: IndexOfKey: key: 'test745723', index: {index}, elapsed time: {sw.Elapsed.TotalSeconds} sec");
sw.Start();
fast.RemoveAt(index);
sw.Stop();
Console.WriteLine($"FastSortedList: RemoveAt: key: 'test745723', index: {index} elapsed time: {sw.Elapsed.TotalSeconds} sec");
sw.Start();
values = fast.Values;
sw.Stop();
Console.WriteLine($"FastSortedList: Get values, elapsed time: {sw.Elapsed.TotalSeconds} sec");
SortedList<string, int> sorted = new SortedList<string, int>(new TestComparer());
sw.Start();
for (int i = 0; i < 1000000; i++)
{
sorted.Add($"test{i}", i);
}
sw.Stop();
Console.WriteLine($"SortedList: Populate records: elapsed time: {sw.Elapsed.TotalSeconds} sec, total records: {sorted.Count}");
sw.Start();
index = sorted.IndexOfKey("test745723");
sw.Stop();
Console.WriteLine($"SortedList: IndexOfKey: key: 'test745723', index: {index}, elapsed time: {sw.Elapsed.TotalSeconds} sec");
sw.Start();
sorted.RemoveAt(index);
sw.Stop();
Console.WriteLine($"SortedList: RemoveAt: key: 'test745723', index: {index} elapsed time: {sw.Elapsed.TotalSeconds} sec");
sw.Start();
values = sorted.Values;
sw.Stop();
Console.WriteLine($"SortedList: Get values, elapsed time: {sw.Elapsed.TotalSeconds} sec");
FastSortedList<string, int> fastRevrese = new FastSortedList<string, int>(new TestComparer());
sw.Start();
for (int i = 999999; i >= 0; i--)
{
fastRevrese.Add($"test{i}", i);
}
sw.Stop();
Console.WriteLine($"FastSortedList: fastRevrese - Populate records: elapsed time: {sw.Elapsed.TotalSeconds} sec, total records: {fastRevrese.Count}");
sw.Start();
index = fastRevrese.IndexOfKey("test745723");
sw.Stop();
Console.WriteLine($"FastSortedList: fastRevrese - IndexOfKey: key: 'test745723', index: {index}, elapsed time: {sw.Elapsed.TotalSeconds} sec");
sw.Start();
fastRevrese.RemoveAt(index);
sw.Stop();
Console.WriteLine($"FastSortedList: fastRevrese - RemoveAt: key: 'test745723', index: {index} elapsed time: {sw.Elapsed.TotalSeconds} sec");
sw.Start();
values = fastRevrese.Values;
sw.Stop();
Console.WriteLine($"SortedList: fastRevrese - Get values, elapsed time: {sw.Elapsed.TotalSeconds} sec");
SortedList<string, int> sortedRevrese = new SortedList<string, int>(new TestComparer());
sw.Start();
for (int i = 999999; i >= 0; i--)
{
sortedRevrese.Add($"test{i}", i);
}
sw.Stop();
Console.WriteLine($"SortedList: sortedRevrese Populate records: elapsed time: {sw.Elapsed.TotalSeconds} sec, total records: {sortedRevrese.Count}");
sw.Start();
index = sortedRevrese.IndexOfKey("test745723");
sw.Stop();
Console.WriteLine($"SortedList: sortedRevrese - IndexOfKey: key: 'test745723', index: {index}, elapsed time: {sw.Elapsed.TotalSeconds} sec");
sw.Start();
sortedRevrese.RemoveAt(index);
sw.Stop();
Console.WriteLine($"SortedList: sortedRevrese - RemoveAt: key: 'test745723', index: {index} elapsed time: {sw.Elapsed.TotalSeconds} sec");
sw.Start();
values = sortedRevrese.Values;
sw.Stop();
Console.WriteLine($"SortedList: sortedRevrese - Get values, elapsed time: {sw.Elapsed.TotalSeconds} sec");
}
Results: withStopwatch
Comparing with Benchmark.NET:
public class RaceBenchmark
{
const int LENGTH = 100000;
[Benchmark]
public void InsertToSortedList()
{
SortedList<string, int> sorted = new SortedList<string, int>(new TestComparer());
for (int i = 0; i < LENGTH; i++)
{
sorted.Add($"test{i}", i);
}
}
[Benchmark]
public void InsertToFastSortedList()
{
FastSortedList<string, int> fast = new FastSortedList<string, int>(new TestComparer());
for (int i = 0; i < LENGTH; i++)
{
fast.Add($"test{i}", i);
}
}
[Benchmark]
public void SortedListIndexOfKey()
{
SortedList<string, int> sorted = new SortedList<string, int>(new TestComparer());
for (int i = 0; i < LENGTH; i++)
{
sorted.Add($"test{i}", i);
}
var sortedListIndex = sorted.IndexOfKey("test4321");
}
[Benchmark]
public void FastSortedListIndexOfKey()
{
FastSortedList<string, int> fast = new FastSortedList<string, int>(new TestComparer());
for (int i = 0; i < LENGTH; i++)
{
fast.Add($"test{i}", i);
}
var fastIndex = fast.IndexOfKey("test4321");
}
[Benchmark]
public void SortedListRemoveAt()
{
SortedList<string, int> sorted = new SortedList<string, int>(new TestComparer());
for (int i = 0; i < LENGTH; i++)
{
sorted.Add($"test{i}", i);
}
var sortedListIndex = sorted.IndexOfKey("test4321");
sorted.RemoveAt(sortedListIndex);
}
[Benchmark]
public void FastSortedListRemoveAt()
{
FastSortedList<string, int> fast = new FastSortedList<string, int>(new TestComparer());
for (int i = 0; i < LENGTH; i++)
{
fast.Add($"test{i}", i);
}
var fastIndex = fast.IndexOfKey("test4321");
fast.RemoveAt(fastIndex);
}
[Benchmark]
public void SortedListGetValues()
{
SortedList<string, int> sorted = new SortedList<string, int>(new TestComparer());
for (int i = 0; i < LENGTH; i++)
{
sorted.Add($"test{i}", i);
}
var values = sorted.Values;
}
[Benchmark]
public void FastSortedLisGetValues()
{
FastSortedList<string, int> fast = new FastSortedList<string, int>(new TestComparer());
for (int i = 0; i < LENGTH; i++)
{
fast.Add($"test{i}", i);
}
var values = fast.Values;
}
}
Results:
withBenchmark.NET
I will be glad for getting a code review for this implementation.
1 Answer 1
GetMedian
-- (Please don't call it "median"; though it is the median of the indexes, it is not the median key.)
return lo + ((hi - lo) >> 1);
-->
return (hi + lo) >> 1;
Do you know if your compile will "inline" functions automatically? If it does not, then do the inlining yourself:
int i = GetMedian(lo, hi);
-->
int i = (hi + lo) >> 1;
Consider frequency...
if (c == 0) ...
if (c < 0) ...
else ...
If the number of items is low, it is good to test for 0 first; but I don't think that is the 'typical' case. So, rearrange the 2 tests and the else.
Furthermore, the particular key "test745723" will go down exactly N levels each time. When N is small, your code is "fast". So, I dispute the results of the benchmark runs that involve just one key.
-
\$\begingroup\$ Your first suggestion is poor, because you've taken a well-behaved function and broken it when the sum is too large for the range of
int
. \$\endgroup\$Toby Speight– Toby Speight2021年03月31日 20:46:19 +00:00Commented Mar 31, 2021 at 20:46 -
\$\begingroup\$ @TobySpeight - OK, add an Assert at the beginning somewhere that aborts if the size of the list is more than half of MAX_INT (or whatever). Executing that once will be a lot more performant than repeatedly performing the extra subtract. \$\endgroup\$Rick James– Rick James2021年03月31日 20:55:54 +00:00Commented Mar 31, 2021 at 20:55
-
\$\begingroup\$ Abort? Well, I suppose that's one possible choice... \$\endgroup\$Toby Speight– Toby Speight2021年03月31日 21:02:33 +00:00Commented Mar 31, 2021 at 21:02
-
\$\begingroup\$ If INT is 32 bits, fiddling with a list of more than 2^31 items will take a bit of time; that would move the user into a different level of "performance" issues. Anyway, what happens if he goes over 2^32? And then there is signed vs unsigned. Etc. \$\endgroup\$Rick James– Rick James2021年03月31日 21:05:28 +00:00Commented Mar 31, 2021 at 21:05
_dict = new Dictionary<TKey, TValue>()
&if (_dict.ContainsKey(key)) throw
.) \$\endgroup\$implements IList<T>
norIDictionary<TKey,TValue>
. \$\endgroup\$List
andDictionary
is a bad practice. If you want a sortedDictionary
useSortedDictionary
instead, which would save you the trouble ! \$\endgroup\$