I am interested in the performance tradeoffs of linked lists, standard lists and unrolled linked lists. This will depend on various factors including what operation is being performed (e.g. delete, insert, and from where), and the size of the nodes. I've written some performance tests (not listed here).
I am interested in whether the node class could be implemented as an array rather than a list, so I'll be trying this in the future. This class works quite well as a stack, but I am also interested in its use as a queue; in order to try this I might alter the Node class to store pointers for the start and end of the list/array.
Does anyone have any feedback on my unrolled linked list class? I did it just for fun.
/// <summary>
/// An unrolled linked list implements the standard list operations.
/// Its implementation is inbetween that of a standard List and a LinkedList.
/// </summary>
public class UnrolledLinkedList<T> : IList<T>
{
private Node first, last;
private int count;
/// <summary>
/// Create a new (empty) unrolled linked list
/// </summary>
public UnrolledLinkedList()
{
count = 0;
}
/// <summary>
/// Create an unrolled linked list which is populated with data from an enumerable
/// </summary>
public UnrolledLinkedList(IEnumerable<T> enumerable)
: this()
{
if (enumerable.Any())
{
first = new Node();
Node node = first;
foreach(T t in enumerable)
{
node = AddAfterIfFull(node, t);
count++;
}
last = node;
}
}
public int IndexOf(T item)
{
int offset = 0;
foreach (Node node in Nodes)
{
int nodeResult = node.IndexOf(item);
if (nodeResult >= 0)
{
return offset + nodeResult;
}
offset += node.Count;
}
return -1;
}
public void Insert(int index, T item)
{
if (first == null) //if the list is empty, then thats only ok if index=0
{
if (index == 0)
{
Initialise(item);
}
else
{
throw new IndexOutOfRangeException("The list is empty, so you can only insert at index 0");
}
}
else
{
if (index == count) //adding right at the end of the list
{
AddAfterIfFull(last, item);
}
else
{
FindResult findResult = Find(index);
if (findResult.node.IsFull)
{
T t = findResult.node.RemoveLast();
AddAfter(findResult.node, t);
}
findResult.node.Insert(index - findResult.offset, item);
}
}
count++;
}
public T this[int index]
{
get
{
FindResult findResult = Find(index);
return findResult.node[index - findResult.offset];
}
set
{
FindResult findResult = Find(index);
findResult.node[index - findResult.offset] = value;
return;
}
}
public void Add(T item)
{
if (first == null)
{
Initialise(item);
}
else
{
AddAfterIfFull(last, item);
}
count++;
}
public void Clear()
{
first = null;
count = 0;
}
public bool Contains(T item)
{
return Nodes.Any(x => x.Contains(item));
}
public void CopyTo(T[] array, int arrayIndex)
{
int i = arrayIndex;
foreach (T t in this)
{
array[i] = t;
i++;
}
}
public int Count
{
get
{
return count;
}
}
public bool IsReadOnly
{
get { return false; }
}
public bool Remove(T item)
{
foreach (Node node in Nodes)
{
int i = node.IndexOf(item);
if (i >= 0)
{
if (node.Count == 1)
{
DeleteNode(node);
}
else
{
node.RemoveAt(i);
}
count--;
return true;
}
}
return false;
}
public void RemoveAt(int index)
{
FindResult findResult = Find(index);
findResult.node.RemoveAt(index - findResult.offset);
if (!findResult.node.Any())
{
DeleteNode(findResult.node);
}
count--;
return;
}
public IEnumerator<T> GetEnumerator()
{
foreach (Node node in Nodes)
{
foreach (T t in node)
{
yield return t;
}
}
}
System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
/// <summary>
/// Initialise this list so it contains only one item.
/// DOES NOT increase the count
/// </summary>
private void Initialise(T firstItem)
{
first = new Node();
first.Add(firstItem);
last = first;
}
/// <summary>
/// Gets an enumerator over all the nodes
/// </summary>
private IEnumerable<Node> Nodes
{
get
{
Node node = first;
while (node != null)
{
yield return node;
node = node.next;
}
}
}
/// <summary>
/// If the node supplied is full then a new one is created afterwards;
/// the item is added
/// </summary>
/// <returns>If the node is full then the new node; otherwise just the node supplied</returns>
private Node AddAfterIfFull(Node node, T item)
{
if (node.IsFull)
{
return AddAfter(node, item);
}
else
{
node.Add(item);
return node;
}
}
/// <summary>
/// Adds a new node after the node supplied, and populates it with the item supplied.
/// DOES NOT increase the count
/// </summary>
private Node AddAfter(Node node, T item)
{
Node newNode = new Node();
newNode.Add(item);
newNode.next = node.next;
newNode.previous = node;
if (node.next == null)
{
last = newNode;
}
else
{
node.next.previous = newNode;
}
node.next = newNode;
return newNode;
}
/// <summary>
/// Removes a node from the list
/// </summary>
private void DeleteNode(Node node)
{
if (node.next == null)
{
last = node.previous;
}
if (node.previous != null)
{
node.previous.next = node.next;
}
if (node.next != null)
{
node.next.previous = node.previous;
}
}
/// <summary>
/// Finds the item with this index,
/// and the index of the first item within that node
/// </summary>
private FindResult Find(int index)
{
int offset = 0;
foreach (Node node in Nodes)
{
int nextOffset = offset + node.Count;
if (index >= offset && index < nextOffset) //found node
{
return new FindResult(node, offset);
}
offset = nextOffset;
}
throw new IndexOutOfRangeException("No item at that index!");
}
/// <summary>
/// Stores the two values returned by Find
/// </summary>
private class FindResult
{
public readonly Node node;
public readonly int offset;
public FindResult(Node node, int offset)
{
this.node = node;
this.offset = offset;
}
}
////////////////////// NODE CLASS //////////////////////
private class Node : List<T>
{
internal const int MaxSize = 100;
public Node next, previous;
public bool IsFull
{
get
{
return Count >= MaxSize;
}
}
public T RemoveLast()
{
int i = Count - 1;
T t = this[i];
RemoveAt(i);
return t;
}
}
}
Performance
Here are some performance statistics. It's not scientific but it gives you a rough idea. Times are in milliseconds.
- Adding 100000 integers (list): 1
Adding (unrolled linked list): 12
Finding the index of an integer at the end of 1000 integers (list): 10886
Finding (unrolled linked list): 18055
Inserting 100000 integers into the middle (list): 22694
Insertion(unrolled linked list): 2238
Deletion of 1000 items from the start (list): 2331
- Deletion (unrolled linked list): 1
It looks like an unrolled linked list could be a good choice for a long list where there is a lot of insertion or deletion at places other than the end. It will have a lower memory footprint than a linked list (I've not tested that out though).
3 Answers 3
A few things off the bat:
- You should consider prefixing private field names with underscores, so it is possible to distinguish them from local variables.
Edit: There is no "official" naming conventions regarding private members. Not that i know of anyway. Underscore however is a somewhat accepted industrial standart nowadays. Mainly, because a) it is used by Microsoft, b) it is used by ReSharper (by default), c) better intelli-sense support, d) when it comes to choosing between
this.name = name;
and_name = name;
most people choose the latter. I think this is pretty accurate IEnumerator<T> enumerator = enumerable.GetEnumerator(); while (enumerator.MoveNext())
emm, why dont you use
foreach
?tuple.Item1[index - tuple.Item2] = value;
- really hard to read.public void Clear() { first = null; count = 0; }
waht happens to
last
reference?internal
members inprivate
class do not make much sense (consider making thosepublic
).Your
Node
class should probably extendList<T>
and not encapsulate it. This way you can drop all those proxy methods and therefore reduce the amount of code. Edit2: i think using arrays inNode
class is an overkill. You'll end up re-writingList
implementation. Resizing is the only noticable performance drawback ofList
but that is not going to happen in your case (since you specify the size in constructor). The rest is unlikely to become any kind of bottleneck, so you probably want to keep it simple.internal const int MaxSize = 100;
this should probably be a parameter rather than a constant (at least as a user i would like to be able to set up the size of a node)
-
\$\begingroup\$ Thanks for the great feedback. Personally I don't like underscores, is there a standard somewhere that supports their use? The parameter thing is important for my performance testing. The reason why I didn't make Node extend List<T> is because I wanted to test using an array to hold the values instead of a list. \$\endgroup\$Paul Richards– Paul Richards2014年02月12日 07:59:26 +00:00Commented Feb 12, 2014 at 7:59
-
\$\begingroup\$ @PaulRichards, check my edit regarding underscores \$\endgroup\$Nikita B– Nikita B2014年02月12日 09:13:51 +00:00Commented Feb 12, 2014 at 9:13
-
1\$\begingroup\$
internal members in private class do not make much sense
If they (e.g. the Node constructor) were a private member instead of internal, then the containing class (e.g. UnrolledLinkedList) wouldn't be able to invoke it. \$\endgroup\$ChrisW– ChrisW2014年02月12日 10:12:31 +00:00Commented Feb 12, 2014 at 10:12 -
\$\begingroup\$ @NikitaBrizhak
most people choose the latter [citation needed]
- that is not, IMO, why it is chosen; it is chosen only for CLS compliance. Also, in the constructor you could set the property directly and not the backing field, avoiding the "oh-so-problematic" extra 5 charactersthis.
. \$\endgroup\$ANeves– ANeves2014年02月12日 11:00:12 +00:00Commented Feb 12, 2014 at 11:00 -
1\$\begingroup\$ The link you referred to says, "Microsoft recommends against the m_ (and the straight _) even though they did both in their code" and "If I want my class to be fully CLS-compliant, I could leave off the prefix on any C# protected member variables." So using an underscore prefix seems to be controversial advice, at best. \$\endgroup\$ChrisW– ChrisW2014年02月12日 11:53:02 +00:00Commented Feb 12, 2014 at 11:53
Few things (mostly small things that could slightly improve performance):
public UnrolledLinkedList(IEnumerable<T> enumerable)
- do not callenumerable.Any()
- instead start enumerating it and check on the first iteration if there is any data. This is for the performance nitpicker.- The same constructor - you should dispose the enumerator if it is disposable (that is what
foreach
would do automatically). - Do not use
Tuple<,>
- instead create your ownstruct
that has two fields (not properties). Named fields will make the code a bit more readable and accessing fields will be faster than properties (it is only used in private members which is why it can be safely done). Insert()
checksfirst==null
whileAdd()
checkslast == null
. Should be consistent.- Since
.IsReadOnly
always returnfalse
you might hide it by implementing it explicitly. RemoveAt()
- use the same approach withnode.Count==1
as inRemove()
. Should be slightly faster.ToString()
- do not print the whole contents of the list since it rarely will give the expected result. Since this is most likely done to enhance debugging experience, use debugging attributesFind()
- you are doingoffset + node.Count
twice - instead useoffset = nextOffset;
.Node(List<T>)
constructor will result in the internal list being created and then promptly removed.- Since
Node
class is private, do not use properties forNext
andPrevious
- instead use fields.
There also could be a benefit of (maybe a parameter that controls it) not destroying the last Node
that you remove - instead hold it in another variable. When you need to create a new node first check if you have one stored there. You might save a lot of operations on memory allocation if the list is used in certain way. You might even store all previously created nodes in a pool. This would be consistent with the behavior of other .NET collection classes that do not decrease their allocated capacity automatically instead always remaining at their peak capacity.
-
\$\begingroup\$ Thanks, I created the FindResult class in place of the Tuple, and implemented some more of your suggested changes also. \$\endgroup\$Paul Richards– Paul Richards2014年02月14日 19:50:34 +00:00Commented Feb 14, 2014 at 19:50
This part is used twice, don't duplicate the code but instead create a common method and call it from both places.
if (last.IsFull)
{
AddItemAfterInNewNode(last, item);
}
else
{
last.Add(item);
}
You could also use LINQ here.
public bool Contains(T item)
{
return Nodes.Any(x => x.Contains(item));
}
And I also would recommend you to inherit from List, it would save you a lot of code.
-
\$\begingroup\$ -1 I cannot endorse subclassing
List<T>
. \$\endgroup\$Mathieu Guindon– Mathieu Guindon2014年02月14日 16:49:19 +00:00Commented Feb 14, 2014 at 16:49 -
\$\begingroup\$ what's wrong about extending from List<>? \$\endgroup\$Yumei De Armas– Yumei De Armas2014年02月14日 19:31:15 +00:00Commented Feb 14, 2014 at 19:31
-
\$\begingroup\$ See Why not inherit from
List<T>
? - although this particular case isn't really a case for composition, even implementingIList<T>
wouldn't be the recommended way, sinceICollection<T>
has all the needed members and is more generic. \$\endgroup\$Mathieu Guindon– Mathieu Guindon2014年02月14日 19:45:11 +00:00Commented Feb 14, 2014 at 19:45 -
2\$\begingroup\$ Thanks, I used the common method and LINQ. Also inheriting from List<T> saved a LOT of code, I think it makes it much simpler. \$\endgroup\$Paul Richards– Paul Richards2014年02月14日 19:49:00 +00:00Commented Feb 14, 2014 at 19:49
Explore related questions
See similar questions with these tags.
List<T>
that will hold the first (or the last) item of each Node used byUnrolledLinkedList<T>
? That way when you want to find the index of an element you just need to perform a binary search in that list (which is always sorted) to know in which Node that element is. If you have a lot of nodes, it will pay. when you think about it, it is somehow similar to aB+Tree
but with only one level below root. \$\endgroup\$