4
\$\begingroup\$

I've implemented a binary heap in F#. It's pure and uses zippers for tree modification. To test it out I have implemented heap sort using it but it takes 10 seconds to sort a list of 100 000. Regular List.sort is instant and since heap sort should have the same complexity I'm wondering what I can do to improve my implementation.

Profiling revealed most of the time (above 50%) is spent in the bubble down, which is to be expected since sort is basically just don't a bunch of removes but nothing in the method is really exceptionally slow (that I can see).

#nowarn "25"
namespace FSharpExt
module Heap =
 type HeapNode<'a, 'b when 'a : comparison> = 
 | Full of 'a * 'b * HeapNode<'a, 'b> * HeapNode<'a, 'b>
 | Half of 'a * 'b * HeapNode<'a, 'b>
 | Leaf of 'a * 'b
 | Empty
 let (|KeyValue|) zipper =
 match zipper with
 | Full(k, v, _, _) | Half(k, v, _) | Leaf(k, v) -> (k, v)
 | Empty -> failwith "List is empty"
 let cut node =
 match node with
 | Leaf(k, v) -> Empty
 | Half(k, v, _) -> Leaf(k, v)
 | Full(k, v, left, _) -> Half(k, v, left)
 type Direction = Left | Right
 type Pointer = Direction list
 let rec next pointer =
 match pointer with
 | [] -> [Left]
 | x :: xs when x = Left -> Right :: xs
 | x :: xs -> Left :: next xs
 let rec previous pointer =
 match pointer with
 | [Left] -> []
 | x :: xs when x = Right -> Left :: xs
 | x :: xs -> Right :: previous xs
 type Zipper<'a, 'b when 'a : comparison> = Zipper of HeapNode<'a, 'b> * (HeapNode<'a, 'b> * Direction) list 
 let moveLeftZipper (Zipper((Full(_, _, left, _) | Half(_, _, left)) as node, path)) = Zipper(left, (node, Left) :: path)
 let moveRightZipper (Zipper(Full(_, _, _, right) as node, path)) = Zipper(right, (node, Right) :: path)
 let moveDirectionZipper direction zipper =
 match direction with
 | Left -> moveLeftZipper zipper
 | Right -> moveRightZipper zipper
 let moveAlongPathZipper path zipper = List.fold (Func.flip moveDirectionZipper) zipper (List.rev path)
 let moveUpZipper (Zipper(current, (last, dir) :: path)) =
 match last, dir with
 | Full(k, v, _, right), Left -> Zipper(Full(k, v, current, right), path)
 | Half(k, v, _), Left -> Zipper(Half(k, v, current), path)
 | Full(k, v, left, _), Right -> Zipper(Full(k, v, left, current), path)
 let rec toRootZipper (Zipper(current, path) as zipper) =
 match path with
 | [] -> zipper
 | x :: xs -> zipper |> moveUpZipper |> toRootZipper 
 let keyValue (Zipper(KeyValue(k, v), _)) = (k, v)
 let modifyCurrentZipper (k, v) (Zipper(current, path)) =
 match current with
 | Full(_, _, left, right) -> Zipper(Full(k, v, left, right), path)
 | Half(_, _, left) -> Zipper(Half(k, v, left), path)
 | _ -> Zipper(Leaf(k, v), path)
 let appendLeafZipper (k, v) (Zipper(current, path)) =
 match current with
 | Half(ck, cv, left) -> Zipper(Full(ck, cv, left, Leaf(k, v)), path) |> moveRightZipper
 | Leaf(ck, cv) -> Zipper(Half(ck, cv, Leaf(k, v)), path) |> moveLeftZipper
 | Empty -> Zipper(Leaf(k, v), [])
 let removeLeafZipper zipper =
 match zipper with
 | Zipper(_, (node, dir) :: path) -> Zipper(cut node, path)
 | _ -> Zipper(Empty, [])
 let rec bubbleUpZipper (Zipper(KeyValue(k, v), path) as zipper) =
 match path with
 | (KeyValue(pk, pv), _) :: rest when pk > k -> modifyCurrentZipper (pk, pv) zipper |> moveUpZipper |> modifyCurrentZipper (k, v) |> bubbleUpZipper
 | _ -> zipper 
 let rec bubbleDownZipper (Zipper(current, _) as zipper) =
 let move fn (ak, av) (bk, bv) = modifyCurrentZipper (bk, bv) zipper |> fn |> modifyCurrentZipper (ak, av) |> bubbleDownZipper
 let right = move moveRightZipper
 let left = move moveLeftZipper
 match current with
 | Full(k, v, KeyValue(lk, lv), KeyValue(rk, rv)) when k > lk || k > rk -> if lk > rk then right (k, v) (rk, rv) else left (k, v) (lk, lv)
 | Half(k, v, KeyValue(lk, lv)) when k > lk -> left (k, v) (lk, lv)
 | _ -> zipper
 let toRoot (zipper, pointer) = (toRootZipper zipper, pointer)
 let appendLeaf (k, v) (zipper, pointer) = (moveAlongPathZipper (List.tail pointer) zipper |> appendLeafZipper (k, v), next pointer)
 let removeLeaf (zipper, pointer) = (moveAlongPathZipper (previous pointer) zipper |> removeLeafZipper, previous pointer)
 let bubbleUp (zipper, pointer) = (bubbleUpZipper zipper, pointer)
 let bubbleDown (zipper, pointer) = (bubbleDownZipper zipper, pointer)
 type Heap<'a, 'b when 'a : comparison> = Heap of Zipper<'a, 'b> * Pointer
 let (|Root|) (Heap(Zipper(root, _), _)) = root
 let insert (k, v) (Heap(zipper, pointer)) = appendLeaf (k, v) (zipper, pointer) |> bubbleUp |> toRoot |> Heap
 let min (Heap(zipper, pointer)) = keyValue zipper
 let remove (Heap(zipper, pointer)) = 
 (modifyCurrentZipper (moveAlongPathZipper (previous pointer) zipper |> keyValue) zipper, pointer) 
 |> removeLeaf 
 |> toRoot 
 |> bubbleDown 
 |> toRoot 
 |> Heap
 let pop heap = (min heap, remove heap)
 let tryPop heap =
 match heap with
 | Root(Empty) -> None
 | _ -> pop heap |> Some
 let singleton (k, v) = Heap(Zipper(Leaf(k, v), []), [Left])
 let empty = Heap(Zipper(Empty, []), [])
 let ofList list = 
 match list with
 | [] -> empty
 | first :: tail -> List.fold (fun h x -> insert x h) (singleton first) tail
 let ofValues list fn =
 match list with
 | [] -> empty
 | first :: tail -> List.fold (fun h x -> insert (fn x, x) h) (singleton (fn first, first)) tail
 let sort list = ofValues list id |> Seq.unfold tryPop |> Seq.map snd |> List.ofSeq

The source is also available on GitHub. Feel free to clone it if you want to profile it or something.

Jamal
35.2k13 gold badges134 silver badges238 bronze badges
asked Jul 6, 2014 at 8:40
\$\endgroup\$
5
  • \$\begingroup\$ Hi, your GitHub repo is missing some files (PriorityQueue.fs, PriorityQueue.fsi, Heap.fsi) and won't build. \$\endgroup\$ Commented Jul 6, 2014 at 23:31
  • \$\begingroup\$ @mjolka Thanks for the warning. I fixed it now. Let me know if it works. \$\endgroup\$ Commented Jul 7, 2014 at 12:43
  • \$\begingroup\$ Representing the heap as an array may improve performance considerably by having localised memory access. E.g.: cse.hut.fi/en/research/SVG/TRAKLA2/tutorials/heap_tutorial/… \$\endgroup\$ Commented Jul 16, 2014 at 10:28
  • \$\begingroup\$ @Mau Doesn't really go well with the whole pure functional thing. \$\endgroup\$ Commented Jul 16, 2014 at 12:02
  • \$\begingroup\$ @Darwin: totally agree. However the comparison is against an implementation that exploits the locality of arrays. \$\endgroup\$ Commented Jul 16, 2014 at 13:35

1 Answer 1

2
\$\begingroup\$

First of all I'm not surprised that your algorithm is significantly slower than List.sort:

  1. List.sort is implemented using Array.Sort, which uses introsort. Normal (array-based) heapsort is one of the slower sorting algorithms.
  2. When compared with normal heapsort, you're using a separate object for each item in the collection.
  3. When compared with mutable tree-based heapsort, you're also creating garbage when "mutating" the tree.

All these put together mean that functional heapsort is never going to be anywhere near List.sort in terms of performance.


I don't see why you're using zippers here at all. After each operation, the zipper is positioned at the root of the tree, so you might as well use just the binary tree (plus a pointer to the next leaf position). All it does is to make your code more complicated (and probably less efficient).


type HeapNode<'a, 'b when 'a : comparison> = 

'a and 'b are pretty bad names. All names you use should be meaningful.


| Full of 'a * 'b * HeapNode<'a, 'b> * HeapNode<'a, 'b>
| Half of 'a * 'b * HeapNode<'a, 'b>
| Leaf of 'a * 'b
| Empty

I don't understand why do you have both Empty and a special case for inner node with one child empty.


let (|KeyValue|) zipper =

This is very confusing, since zipper is not a Zipper, it's a HeapNode.


let rec next pointer =
 match pointer with
 | [] -> [Left]
 | x :: xs when x = Left -> Right :: xs
 | x :: xs -> Left :: next xs

I don't see any reason to use when here, you could write this as:

let rec next pointer =
 match pointer with
 | [] -> [Left]
 | Left :: xs -> Right :: xs
 | Right :: xs -> Left :: next xs

Or even (if you're okay with not naming the parameter):

let rec next =
 function
 | [] -> [Left]
 | Left :: xs -> Right :: xs
 | Right :: xs -> Left :: next xs

let rec toRootZipper (Zipper(current, path) as zipper) =
 match path with
 | [] -> zipper
 | x :: xs -> zipper |> moveUpZipper |> toRootZipper 

You don't need to name variables you're not going to use. Here, you could just write _ instead of x :: xs.


let ofList list = 
 match list with
 | [] -> empty
 | first :: tail -> List.fold (fun h x -> insert x h) (singleton first) tail

You could start folding from empty instead of singleton first. That way, you wouldn't even need a special case for the empty list:

let ofList list = List.fold (fun h x -> insert x h) empty list
answered Jul 15, 2014 at 18:52
\$\endgroup\$
4
  • \$\begingroup\$ Hey, thanks for taking the time to review my code. I'll assimilate the changes :D They all make sense. Oh, and I didn't know naming generic parameters was a thing. I thought using a, b, c.. was the convention. \$\endgroup\$ Commented Jul 15, 2014 at 21:48
  • \$\begingroup\$ Oh, and let me just add that I'm not using zipper for performance. It's just that the algorithm was easier to express for me when I can use the notion of moving through the tree. \$\endgroup\$ Commented Jul 15, 2014 at 21:56
  • \$\begingroup\$ When you have just a single obvious generic parameter, calling it 'T or 'a is fine. But when there's more than one, or when it's not obvious, you should name them. \$\endgroup\$ Commented Jul 15, 2014 at 22:05
  • \$\begingroup\$ And the way you talk about moving, it kind of sounds to me like you're thinking imperatively. I think your code would be simpler if you got rid of the zipper (but I could be wrong, I've never actually implemented functional heapsort). \$\endgroup\$ Commented Jul 15, 2014 at 22:09

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.