5
\$\begingroup\$

I wanted to implement Dijkstra's Algorithm in an Excel VBA Add-In and built it to be used as follows:

  1. Define a list of paths with distances between points. This list needs to contain 3 headings that are used as flags to pick up where the list is. The 3 headings are !dijk:dat:from, !dijk:dat:to and !dijk:dat:dist
  2. Specify from which point to which point you want to go. This is indicated with flags to the left of the cell. The flags are !dijk:get:from and !dijk:get:to
  3. If the list of paths is on a different sheet, specify which sheet it is on by putting the name of the sheet in a cell next to a cell with the text !dijk:dat
  4. Specify where the output should go. This is defined with a flag at the top left of where it should go. The flag is !dijk:steps
  5. Push a button in the Ribbon that triggers Sub sCalcDijkstra() in a module in my Add-In

An example of a dummy sheet I used for testing:

Example Sheet

This is the procedure that does all the work:

Sub sCalcDijkstra()
 'Calculate the shortest path between 2 points
 Dim vError As String
 Dim vRange As Range
 Dim vRangeDat As Range
 Dim vRow As Long
 Dim vRowDatHead As Long
 Dim vRowSteps As Long
 Dim vRowFirst As Long
 Dim vRowCount As Long
 Dim vRowCountDat As Long
 Dim vCol As Long
 Dim vColDatFrom As Long
 Dim vColDatTo As Long
 Dim vColDatDist As Long
 Dim vColSteps As Long
 Dim vColFirst As Long
 Dim vColCount As Long
 Dim vColCountDat As Long
 Dim vCell As String
 Dim vCellFrom As String
 Dim vCellTo As String
 Dim vValDist As Double
 Dim vParFrom As String
 Dim vParTo As String
 Dim vParDat As String
 Dim vDist As Scripting.Dictionary
 Dim vKey As Variant
 Dim vCurNode As String
 Dim vCurDist As Double
 Dim vCurDistTo As Double
 Dim vSteps() As String
 On Error GoTo 0
 vError = ""
 'Check that there is a workbook open
 If ActiveSheet Is Nothing Then vError = "You need to open a workbook in order to do this"
 If vError <> "" Then GoTo ErrorHandler
 'Get the settings from the current sheet
 Set vRange = ActiveSheet.UsedRange
 vRowCount = vRange.Rows.Count
 vColCount = vRange.Columns.Count
 vRowFirst = vRange.Row
 vColFirst = vRange.Column
 vRowSteps = 0
 vColSteps = 0
 vParFrom = ""
 vParTo = ""
 vParDat = ""
 For vRow = 1 To vRowCount
 For vCol = 1 To vColCount
 vCell = ""
 On Error Resume Next
 vCell = Trim(UCase(vRange.Cells(vRow, vCol).Value))
 On Error GoTo 0
 If vCell = "!DIJK:GET:FROM" Then
 vParFrom = Trim(UCase(vRange.Cells(vRow, vCol + 1).Value))
 ElseIf vCell = "!DIJK:GET:TO" Then
 vParTo = Trim(UCase(vRange.Cells(vRow, vCol + 1).Value))
 ElseIf vCell = "!DIJK:DAT" Then
 vParDat = Trim(UCase(vRange.Cells(vRow, vCol + 1).Value))
 ElseIf vCell = "!DIJK:STEPS" Then
 vRowSteps = vRow
 vColSteps = vCol
 End If
 Next
 Next
 If vParFrom = "" Then vError = vError & "Need to specify a Source with the parameter !dijk:get:from" & vbCrLf & vbCrLf
 If vParTo = "" Then vError = vError & "Need to specify a Destination with the parameter !dijk:get:to" & vbCrLf & vbCrLf
 If vRowSteps = 0 Then vError = vError & "Need to designate an area to print the results with the parameter !dijk:steps" & vbCrLf & vbCrLf
 If vError <> "" Then GoTo ErrorHandler
 'Clean up the output area
 vRange.Range(vRange.Cells(vRowSteps + 2 - vRowFirst, vColSteps + 1 - vColFirst).Address, vRange.Cells(vRowCount + vRowFirst - 1, vColSteps + 3 - vColFirst).Address).ClearContents
 'Get the paths from the data sheet
 If vParDat = "" Then
 Set vRangeDat = vRange
 Else
 Set vRangeDat = ActiveWorkbook.Worksheets(vParDat).UsedRange
 End If
 vRowCountDat = vRangeDat.Rows.Count
 vColCountDat = vRangeDat.Columns.Count
 vRowDatHead = 0
 vColDatFrom = 0
 vColDatTo = 0
 vColDatDist = 0
 For vRow = 1 To vRowCountDat
 For vCol = 1 To vColCountDat
 vCell = ""
 On Error Resume Next
 vCell = Trim(UCase(vRangeDat.Cells(vRow, vCol).Value))
 On Error GoTo 0
 If vCell = "!DIJK:DAT:FROM" Then
 vRowDatHead = vRow
 vColDatFrom = vCol
 ElseIf vCell = "!DIJK:DAT:TO" Then
 vRowDatHead = vRow
 vColDatTo = vCol
 ElseIf vCell = "!DIJK:DAT:DIST" Then
 vRowDatHead = vRow
 vColDatDist = vCol
 End If
 Next
 If vRowDatHead > 0 Then Exit For
 Next
 If vColDatFrom = 0 Then vError = vError & "Data sheet is missing !dijk:dat:from column" & vbCrLf & vbCrLf
 If vColDatTo = 0 Then vError = vError & "Data sheet is missing !dijk:dat:to column" & vbCrLf & vbCrLf
 If vColDatDist = 0 Then vError = vError & "Data sheet is missing !dijk:dat:dist column" & vbCrLf & vbCrLf
 If vError <> "" Then GoTo ErrorHandler
 Set vDist = New Scripting.Dictionary
 For vRow = vRowDatHead + 1 To vRowCountDat
 vCellFrom = ""
 vCellTo = ""
 vValDist = -1
 On Error Resume Next
 vCellFrom = Trim(UCase(vRangeDat.Cells(vRow, vColDatFrom).Value))
 vCellTo = Trim(UCase(vRangeDat.Cells(vRow, vColDatTo).Value))
 vValDist = Val(Trim(UCase(vRangeDat.Cells(vRow, vColDatDist).Value)))
 On Error GoTo 0
 If vCellFrom <> "" And vCellTo <> "" And vValDist >= 0 Then
 If Not vDist.Exists(vCellFrom) Then Set vDist.Item(vCellFrom) = New Scripting.Dictionary
 If Not vDist.Exists(vCellTo) Then Set vDist.Item(vCellTo) = New Scripting.Dictionary
 vDist(vCellFrom).Item(vCellTo) = vValDist
 If Not vDist(vCellTo).Exists(vCellFrom) Then vDist(vCellTo).Item(vCellFrom) = vValDist
 End If
 Next
 If Not vDist.Exists(vParFrom) Then vError = vError & "Source " & vParFrom & " not listed in data" & vbCrLf & vbCrLf
 If Not vDist.Exists(vParTo) Then vError = vError & "Destination " & vParTo & " not listed in data" & vbCrLf & vbCrLf
 If vError <> "" Then GoTo ErrorHandler
 'Calculate the shortest path
 For Each vKey In vDist.Keys()
 vDist(vKey).Item("!dist") = -1
 vDist(vKey).Item("!scan") = False
 vDist(vKey).Item("!steps") = ""
 Next
 vDist(vParFrom).Item("!dist") = 0
 vDist(vParFrom).Item("!steps") = vParFrom
 Do While True
 vCurNode = ""
 vCurDist = 0
 For Each vKey In vDist.Keys()
 If vDist(vKey)("!scan") = False Then
 If vDist(vKey)("!dist") >= 0 Then
 If vCurNode = "" Or vCurDist > vDist(vKey)("!dist") Then
 vCurNode = vKey
 vCurDist = vDist(vKey)("!dist")
 End If
 End If
 End If
 Next
 If vCurNode = "" Then Exit Do
 If vCurNode = vParTo Then Exit Do
 vDist(vCurNode).Item("!scan") = True
 For Each vKey In vDist(vCurNode).Keys()
 If Left(vKey, 1) <> "!" And vKey <> vCurNode Then
 vCurDistTo = vCurDist + vDist(vCurNode)(vKey)
 If vDist(vKey)("!dist") < 0 Or vCurDistTo < vDist(vKey)("!dist") Then
 vDist(vKey).Item("!dist") = vCurDistTo
 vDist(vKey).Item("!steps") = vDist(vCurNode)("!steps") & "!" & vKey
 End If
 End If
 Next
 Loop
 'Print the result
 If vDist(vParTo)("!dist") < 0 Then
 vRange.Cells(vRowSteps + 1, vColSteps).Value = "No path found from source to destination"
 Else
 vSteps = Split(vDist(vParTo)("!steps"), "!")
 For vRow = 1 To UBound(vSteps)
 vRange.Cells(vRowSteps + vRow, vColSteps).Value = vSteps(vRow - 1)
 vRange.Cells(vRowSteps + vRow, vColSteps + 1).Value = vSteps(vRow)
 vRange.Cells(vRowSteps + vRow, vColSteps + 2).Value = vDist(vSteps(vRow - 1))(vSteps(vRow))
 Next
 vRange.Cells(vRowSteps + vRow, vColSteps).Value = "Total:"
 vRange.Cells(vRowSteps + vRow, vColSteps + 2).Value = vDist(vParTo)("!dist")
 End If
 'Done
 MsgBox "Done", vbOKOnly + vbInformation, "Path and Distance"
 GoTo Finalize
ErrorHandler:
 Err.Clear
 MsgBox vError, vbOKOnly + vbCritical, "Error"
Finalize:
 Set vDist = Nothing
End Sub

The code works, but I would like some feedback on the following aspects:

  • How can I make this easier and more intuitive for a user? I know I can use named ranges instead of flags, but I would prefer to keep it more visibly obvious what the code is using
  • How can I apply the DRY principle more here. I'm repeating the same patterns all the time, but it seems like the details vary too much for me to just stick something like the nested for loops into a function
  • I use Scripting.Dictionary for almost everything due to it's flexibility and simply due to the fact that I am comfortable with how it works, but I suspect there may be better data structures that I can use which work better for this use case
  • At the heart of this is the Do While True loop which is probably a horribly inefficient way to implement Dijkstra's Algorithm. How can I make it more efficient?
  • Any help/critique is greatly appreciated. I taught myself VBA with the help of Google and I may be doing some bad things I never even realised
asked Jul 19, 2018 at 9:32
\$\endgroup\$

2 Answers 2

4
\$\begingroup\$

That is a humungous procedure you got there. It's all in one piece and it's following a lot of conventions from older VBA code that you should not let become a habit.

The first of these "conventions" is to declare every variable at the top of the block they're scoped to. This is a relic of "ye olden days" where it was important to know up front what things were in a procedure and how you could refer to them. See what that does to the readability of your code on a screen that is not in portrait orientation:

declarations section from the question screenshotted

This is... not really useful, because I can't even remotely tell what variable is needed where and even whether it's needed.

Declare variables as close as possible to their usage.
This has the added benefit of reducing the mental strain when reading the code. You don't need to remember every variable declaration, only those in close proximity to understand the code.

While we're at that section: I noticed that you prefixed every single one of these variables with a v, most likely for "Variable". Don't do that. This adds no useful information to the name of the variable and should frankly speaking be unnecessary.

'Check that there is a workbook open
If ActiveSheet Is Nothing Then vError = "You need to open a workbook in order to do this"
If vError <> "" Then GoTo ErrorHandler

Let's rewrite this a bit. For one the comment is a lie, this doesn't check that a workbook is open, it checks that the ActiveSheet property of the global Application object is not Nothing. In addition you're making this somewhat harder to read by forcing yourself into single-line if-statements:

If ActiveSheet Is Nothing Then
 vError = "You need to open a workbook and select a sheet in order to do this"
 GoTo ErrorHandler
End If

Doing all the work is long and tedious. This code can benefit a lot from extracting subroutines into actual subroutines or Functions. Consider encapsulating blocks of code that have an explanatory comment into their own function:

Sub SolveDijkstra()
 If Not CheckWorkbook() Then Exit Sub
 Dim settings As DijkstraSettings
 Set settings = GetSettingsFromCurrentSheet()
 If Not CheckSettings(settings) Then Exit Sub
 CleanOutputArea()
 Dim graph As Graph
 Set graph = GetPathsFromDataSheet()
 If Not CheckGraph(graph) Then Exit Sub
 Dim path As DijkstraSolution
 Set path = Dijkstra(graph, settings)
 If path.HasSolution Then
 WritePathToOutput(path)
 Else
 WriteErrorToOutput("No path found from source to destination")
 End If
 MsgBox "Done", vbOKOnly + vbInformation, "Path and Distance"
End Sub

This "reimagination" has the clear benefit of allowing us to abstract the tedious separate steps into methods and objects. We don't need to understand the 10 different loops to grasp what this sub does.

Note that this also sidesteps the usage of GoTo which is ... problematic in some contexts.


In closing I want to explicitly call out stuff I noticed as outstanding:

  • You always access a cell's value explicitly through Value :+1:
  • You use a dictionary to keep track of the cost for a given node
  • You reinstate error handling after On Error Resume Next statements and seem to have tried to keep OERN areas as small as possible
  • You validate your input and have a pretty clean way of getting it from your sheet

What's missing to take your VBA coding to the next step is the use of objects and user-defined types as well as noting that you don't need to reuse a block of code to extract it.

answered Jul 19, 2018 at 15:20
\$\endgroup\$
1
  • \$\begingroup\$ Thank you. This is exactly the kind of feedback I was looking for. I agree with all your points. Time to learn about user-defined objects in VBA. I'm not very familiar with objects in VBA, but have used them in other languages \$\endgroup\$ Commented Jul 20, 2018 at 7:36
3
\$\begingroup\$

Question 1

How can I make this easier and more intuitive for a user? I know I can use named ranges instead of flags, but I would prefer to keep it more visibly obvious what the code is using

The first thing that I would do is rename all data headers. I understand that you wanted an unique identifier that "!dijk:dat:from" to me is not at all intuitive to me. After studying Dijkstra's algorithm I came back to determined that it is represents Dijkstra's - data - From nodes (vertices). Although they make perfect sense, I also don't like From and To. I'm not an authority but I think that there is no inherent direction to the connection of two nodes in a graph.

Alternate Column Headers:

  • !dijk:dat:from Node, Node1, Vertex, Vertex1
  • !dijk:dat:to Neighbor, Node2, Vertex2
  • !dijk:dat:dist Distance
  • !dijk:get:from Origin
  • !dijk:get:to Destination
  • !dijk:steps Path

enter image description here

Question 2

How can I apply the DRY principle more here. I'm repeating the same patterns all the time, but it seems like the details vary too much for me to just stick something like the nested for loops into a function

There isn't really any unnecessary repeated code.

↓This loop↓ is repeated twice but the second loop relies on information gathered from the first loop.

For vRow = 1 To vRowCount
 For vCol = 1 To vColCount

Question 3

I use Scripting.Dictionary for almost everything due to it's flexibility and simply due to the fact that I am comfortable with how it works, but I suspect there may be better data structures that I can use which work better for this use case

A custom Node class would have made it easier work out the logic but it is not necessary.

Pseudo Class: GraphNode

Option Explicit
Const Infinity As Long = 2147483647
Private Type Members
 NodeKey As String
 NeighborKey As String
 Distance As Long
End Type
Private m As Members
Public Sub Init(NodeKey As String, Optional NeighborKey As String, Optional Distance As Long = Infinity)
 m.NodeKey = NodeKey
 m.NeighborKey = NeighborKey
 m.Distance = Distance
End Sub
Public Function getKey() As String
Attribute Value.VB_UserMemId = 0
 getKey = m.NodeKey = NodeKey
End Function
Public Function getDistance(Node As GraphNode) As Long
 getDistance = m.Distance
End Function
Public Function UpdateDistance(Node As GraphNode) As Boolean
 If Node.getDistance < m.Distance Then
 UpdateDistance = True
 m.Distance = Node.getDistance
 End If
End Function

I would make some changes in the way that you use the dictionaries.

Using these keys "!dist", "!steps", "!scan" as properties works very well.

vDist(vKey)("!dist")
vDist(vKey).Item("!dist")
vDist(vKey).Item("!steps")
vDist(vKey).Item("!scan")

I would use constants instead. This will not only make the code easier to read but enable you to use intellisense with them.

enter image description here

Item can be omitted because the default member of a Scripting Dictionary is Items().

 vDist(vKey).Item("!dist")
vDist(vKey)("!dist")

Since Items() takes a string as an argument we can use the Bang operator to pass a string key to Items() and return the it's value.

vDist(vKey)![!dist]

The bracket are necessary because !dist is not a valid variable name. Removing ! from the variable names will allow you to do this:

vDist(vKey)!dist

Question 4

At the heart of this is the Do While True loop which is probably a horribly inefficient way to implement Dijkstra's Algorithm. How can I make it more efficient?

Dijkstra's Algorithm requires a Do or a While loop this is unavoidable. Implementing the min-priority queue meationed in the article you linked would make it more efficient but that is out of the scope of a review.

Wikipedia - Dijkstra's algorithm

A min-priority queue is an abstract data type that provides 3 basic operations : add_with_priority(), decrease_priority() and extract_min(). As mentioned earlier, using such a data structure can lead to faster computing times than using a basic queue.

Add Template

Creating a subroutine to add a template worksheet would make the setup easier for both you and the users. Using constants for the Column Headers and working with tables would also simplify things.

Const NodeHeader As String = "Node", NeighborHeader As String = "Neighbor", DistanceHeader As String = "Distance"
Const OriginHeader As String = "Origin", DestinationHeader As String = "Destination"
Const Distance As String = "Distance", Path As String = "Path", Visited As String = "Visited"
Sub AddTemplate()
 Dim TableRange As Range
 With Worksheets.Add
 ' Add Graph NodeList Table
 .Range("B1").Value = "Graph - Node List"
 Set TableRange = .Range("B2").Resize(1, 3)
 TableRange.Value = Array(NodeHeader, NeighborHeader, DistanceHeader)
 .ListObjects.Add xlSrcRange, TableRange, , xlYes
 ' Add Settings Table
 .Range("F1").Value = "Settings"
 Set TableRange = .Range("F2").Resize(1, 2)
 TableRange.Value = Array(OriginHeader, DestinationHeader)
 .ListObjects.Add xlSrcRange, TableRange, , xlYes
 ' Add Results Table
 .Range("F5").Value = "Results"
 Set TableRange = .Range("F6").Resize(1, 3)
 TableRange.Value = Array(OriginHeader, DestinationHeader, DistanceHeader)
 .ListObjects.Add(xlSrcRange, TableRange, , xlYes).ShowTotals = True
 TableRange.Cells(3, 3).Formula = "=SUBTOTAL(109,[Distance])"
 End With
End Sub

enter image description here

answered Jul 24, 2018 at 5:25
\$\endgroup\$
1
  • \$\begingroup\$ For the record: I dislike the suggestion of the bang operator. Other than that, this looks pretty good :) \$\endgroup\$ Commented Sep 4, 2018 at 0:53

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.