I've implemented a binary heap in F#. It's pure and uses zippers for tree modification. To test it out I have implemented heap sort using it but it takes 10 seconds to sort a list of 100 000. Regular List.sort is instant and since heap sort should have the same complexity I'm wondering what I can do to improve my implementation.
Profiling revealed most of the time (above 50%) is spent in the bubble down, which is to be expected since sort is basically just don't a bunch of removes but nothing in the method is really exceptionally slow (that I can see).
#nowarn "25"
namespace FSharpExt
module Heap =
type HeapNode<'a, 'b when 'a : comparison> =
| Full of 'a * 'b * HeapNode<'a, 'b> * HeapNode<'a, 'b>
| Half of 'a * 'b * HeapNode<'a, 'b>
| Leaf of 'a * 'b
| Empty
let (|KeyValue|) zipper =
match zipper with
| Full(k, v, _, _) | Half(k, v, _) | Leaf(k, v) -> (k, v)
| Empty -> failwith "List is empty"
let cut node =
match node with
| Leaf(k, v) -> Empty
| Half(k, v, _) -> Leaf(k, v)
| Full(k, v, left, _) -> Half(k, v, left)
type Direction = Left | Right
type Pointer = Direction list
let rec next pointer =
match pointer with
| [] -> [Left]
| x :: xs when x = Left -> Right :: xs
| x :: xs -> Left :: next xs
let rec previous pointer =
match pointer with
| [Left] -> []
| x :: xs when x = Right -> Left :: xs
| x :: xs -> Right :: previous xs
type Zipper<'a, 'b when 'a : comparison> = Zipper of HeapNode<'a, 'b> * (HeapNode<'a, 'b> * Direction) list
let moveLeftZipper (Zipper((Full(_, _, left, _) | Half(_, _, left)) as node, path)) = Zipper(left, (node, Left) :: path)
let moveRightZipper (Zipper(Full(_, _, _, right) as node, path)) = Zipper(right, (node, Right) :: path)
let moveDirectionZipper direction zipper =
match direction with
| Left -> moveLeftZipper zipper
| Right -> moveRightZipper zipper
let moveAlongPathZipper path zipper = List.fold (Func.flip moveDirectionZipper) zipper (List.rev path)
let moveUpZipper (Zipper(current, (last, dir) :: path)) =
match last, dir with
| Full(k, v, _, right), Left -> Zipper(Full(k, v, current, right), path)
| Half(k, v, _), Left -> Zipper(Half(k, v, current), path)
| Full(k, v, left, _), Right -> Zipper(Full(k, v, left, current), path)
let rec toRootZipper (Zipper(current, path) as zipper) =
match path with
| [] -> zipper
| x :: xs -> zipper |> moveUpZipper |> toRootZipper
let keyValue (Zipper(KeyValue(k, v), _)) = (k, v)
let modifyCurrentZipper (k, v) (Zipper(current, path)) =
match current with
| Full(_, _, left, right) -> Zipper(Full(k, v, left, right), path)
| Half(_, _, left) -> Zipper(Half(k, v, left), path)
| _ -> Zipper(Leaf(k, v), path)
let appendLeafZipper (k, v) (Zipper(current, path)) =
match current with
| Half(ck, cv, left) -> Zipper(Full(ck, cv, left, Leaf(k, v)), path) |> moveRightZipper
| Leaf(ck, cv) -> Zipper(Half(ck, cv, Leaf(k, v)), path) |> moveLeftZipper
| Empty -> Zipper(Leaf(k, v), [])
let removeLeafZipper zipper =
match zipper with
| Zipper(_, (node, dir) :: path) -> Zipper(cut node, path)
| _ -> Zipper(Empty, [])
let rec bubbleUpZipper (Zipper(KeyValue(k, v), path) as zipper) =
match path with
| (KeyValue(pk, pv), _) :: rest when pk > k -> modifyCurrentZipper (pk, pv) zipper |> moveUpZipper |> modifyCurrentZipper (k, v) |> bubbleUpZipper
| _ -> zipper
let rec bubbleDownZipper (Zipper(current, _) as zipper) =
let move fn (ak, av) (bk, bv) = modifyCurrentZipper (bk, bv) zipper |> fn |> modifyCurrentZipper (ak, av) |> bubbleDownZipper
let right = move moveRightZipper
let left = move moveLeftZipper
match current with
| Full(k, v, KeyValue(lk, lv), KeyValue(rk, rv)) when k > lk || k > rk -> if lk > rk then right (k, v) (rk, rv) else left (k, v) (lk, lv)
| Half(k, v, KeyValue(lk, lv)) when k > lk -> left (k, v) (lk, lv)
| _ -> zipper
let toRoot (zipper, pointer) = (toRootZipper zipper, pointer)
let appendLeaf (k, v) (zipper, pointer) = (moveAlongPathZipper (List.tail pointer) zipper |> appendLeafZipper (k, v), next pointer)
let removeLeaf (zipper, pointer) = (moveAlongPathZipper (previous pointer) zipper |> removeLeafZipper, previous pointer)
let bubbleUp (zipper, pointer) = (bubbleUpZipper zipper, pointer)
let bubbleDown (zipper, pointer) = (bubbleDownZipper zipper, pointer)
type Heap<'a, 'b when 'a : comparison> = Heap of Zipper<'a, 'b> * Pointer
let (|Root|) (Heap(Zipper(root, _), _)) = root
let insert (k, v) (Heap(zipper, pointer)) = appendLeaf (k, v) (zipper, pointer) |> bubbleUp |> toRoot |> Heap
let min (Heap(zipper, pointer)) = keyValue zipper
let remove (Heap(zipper, pointer)) =
(modifyCurrentZipper (moveAlongPathZipper (previous pointer) zipper |> keyValue) zipper, pointer)
|> removeLeaf
|> toRoot
|> bubbleDown
|> toRoot
|> Heap
let pop heap = (min heap, remove heap)
let tryPop heap =
match heap with
| Root(Empty) -> None
| _ -> pop heap |> Some
let singleton (k, v) = Heap(Zipper(Leaf(k, v), []), [Left])
let empty = Heap(Zipper(Empty, []), [])
let ofList list =
match list with
| [] -> empty
| first :: tail -> List.fold (fun h x -> insert x h) (singleton first) tail
let ofValues list fn =
match list with
| [] -> empty
| first :: tail -> List.fold (fun h x -> insert (fn x, x) h) (singleton (fn first, first)) tail
let sort list = ofValues list id |> Seq.unfold tryPop |> Seq.map snd |> List.ofSeq
The source is also available on GitHub. Feel free to clone it if you want to profile it or something.
-
\$\begingroup\$ Hi, your GitHub repo is missing some files (PriorityQueue.fs, PriorityQueue.fsi, Heap.fsi) and won't build. \$\endgroup\$mjolka– mjolka2014年07月06日 23:31:33 +00:00Commented Jul 6, 2014 at 23:31
-
\$\begingroup\$ @mjolka Thanks for the warning. I fixed it now. Let me know if it works. \$\endgroup\$Darwin– Darwin2014年07月07日 12:43:39 +00:00Commented Jul 7, 2014 at 12:43
-
\$\begingroup\$ Representing the heap as an array may improve performance considerably by having localised memory access. E.g.: cse.hut.fi/en/research/SVG/TRAKLA2/tutorials/heap_tutorial/… \$\endgroup\$Mau– Mau2014年07月16日 10:28:59 +00:00Commented Jul 16, 2014 at 10:28
-
\$\begingroup\$ @Mau Doesn't really go well with the whole pure functional thing. \$\endgroup\$Darwin– Darwin2014年07月16日 12:02:51 +00:00Commented Jul 16, 2014 at 12:02
-
\$\begingroup\$ @Darwin: totally agree. However the comparison is against an implementation that exploits the locality of arrays. \$\endgroup\$Mau– Mau2014年07月16日 13:35:15 +00:00Commented Jul 16, 2014 at 13:35
1 Answer 1
First of all I'm not surprised that your algorithm is significantly slower than List.sort
:
List.sort
is implemented usingArray.Sort
, which uses introsort. Normal (array-based) heapsort is one of the slower sorting algorithms.- When compared with normal heapsort, you're using a separate object for each item in the collection.
- When compared with mutable tree-based heapsort, you're also creating garbage when "mutating" the tree.
All these put together mean that functional heapsort is never going to be anywhere near List.sort
in terms of performance.
I don't see why you're using zippers here at all. After each operation, the zipper is positioned at the root of the tree, so you might as well use just the binary tree (plus a pointer to the next leaf position). All it does is to make your code more complicated (and probably less efficient).
type HeapNode<'a, 'b when 'a : comparison> =
'a
and 'b
are pretty bad names. All names you use should be meaningful.
| Full of 'a * 'b * HeapNode<'a, 'b> * HeapNode<'a, 'b>
| Half of 'a * 'b * HeapNode<'a, 'b>
| Leaf of 'a * 'b
| Empty
I don't understand why do you have both Empty
and a special case for inner node with one child empty.
let (|KeyValue|) zipper =
This is very confusing, since zipper
is not a Zipper
, it's a HeapNode
.
let rec next pointer =
match pointer with
| [] -> [Left]
| x :: xs when x = Left -> Right :: xs
| x :: xs -> Left :: next xs
I don't see any reason to use when
here, you could write this as:
let rec next pointer =
match pointer with
| [] -> [Left]
| Left :: xs -> Right :: xs
| Right :: xs -> Left :: next xs
Or even (if you're okay with not naming the parameter):
let rec next =
function
| [] -> [Left]
| Left :: xs -> Right :: xs
| Right :: xs -> Left :: next xs
let rec toRootZipper (Zipper(current, path) as zipper) =
match path with
| [] -> zipper
| x :: xs -> zipper |> moveUpZipper |> toRootZipper
You don't need to name variables you're not going to use. Here, you could just write _
instead of x :: xs
.
let ofList list =
match list with
| [] -> empty
| first :: tail -> List.fold (fun h x -> insert x h) (singleton first) tail
You could start folding from empty
instead of singleton first
. That way, you wouldn't even need a special case for the empty list:
let ofList list = List.fold (fun h x -> insert x h) empty list
-
\$\begingroup\$ Hey, thanks for taking the time to review my code. I'll assimilate the changes :D They all make sense. Oh, and I didn't know naming generic parameters was a thing. I thought using a, b, c.. was the convention. \$\endgroup\$Darwin– Darwin2014年07月15日 21:48:20 +00:00Commented Jul 15, 2014 at 21:48
-
\$\begingroup\$ Oh, and let me just add that I'm not using zipper for performance. It's just that the algorithm was easier to express for me when I can use the notion of moving through the tree. \$\endgroup\$Darwin– Darwin2014年07月15日 21:56:04 +00:00Commented Jul 15, 2014 at 21:56
-
\$\begingroup\$ When you have just a single obvious generic parameter, calling it
'T
or'a
is fine. But when there's more than one, or when it's not obvious, you should name them. \$\endgroup\$svick– svick2014年07月15日 22:05:10 +00:00Commented Jul 15, 2014 at 22:05 -
\$\begingroup\$ And the way you talk about moving, it kind of sounds to me like you're thinking imperatively. I think your code would be simpler if you got rid of the zipper (but I could be wrong, I've never actually implemented functional heapsort). \$\endgroup\$svick– svick2014年07月15日 22:09:16 +00:00Commented Jul 15, 2014 at 22:09