I wanted to implement Dijkstra's Algorithm in an Excel VBA Add-In and built it to be used as follows:
- Define a list of paths with distances between points. This list needs to contain 3 headings that are used as flags to pick up where the list is. The 3 headings are
!dijk:dat:from
,!dijk:dat:to
and!dijk:dat:dist
- Specify from which point to which point you want to go. This is indicated with flags to the left of the cell. The flags are
!dijk:get:from
and!dijk:get:to
- If the list of paths is on a different sheet, specify which sheet it is on by putting the name of the sheet in a cell next to a cell with the text
!dijk:dat
- Specify where the output should go. This is defined with a flag at the top left of where it should go. The flag is
!dijk:steps
- Push a button in the Ribbon that triggers
Sub sCalcDijkstra()
in a module in my Add-In
An example of a dummy sheet I used for testing:
This is the procedure that does all the work:
Sub sCalcDijkstra()
'Calculate the shortest path between 2 points
Dim vError As String
Dim vRange As Range
Dim vRangeDat As Range
Dim vRow As Long
Dim vRowDatHead As Long
Dim vRowSteps As Long
Dim vRowFirst As Long
Dim vRowCount As Long
Dim vRowCountDat As Long
Dim vCol As Long
Dim vColDatFrom As Long
Dim vColDatTo As Long
Dim vColDatDist As Long
Dim vColSteps As Long
Dim vColFirst As Long
Dim vColCount As Long
Dim vColCountDat As Long
Dim vCell As String
Dim vCellFrom As String
Dim vCellTo As String
Dim vValDist As Double
Dim vParFrom As String
Dim vParTo As String
Dim vParDat As String
Dim vDist As Scripting.Dictionary
Dim vKey As Variant
Dim vCurNode As String
Dim vCurDist As Double
Dim vCurDistTo As Double
Dim vSteps() As String
On Error GoTo 0
vError = ""
'Check that there is a workbook open
If ActiveSheet Is Nothing Then vError = "You need to open a workbook in order to do this"
If vError <> "" Then GoTo ErrorHandler
'Get the settings from the current sheet
Set vRange = ActiveSheet.UsedRange
vRowCount = vRange.Rows.Count
vColCount = vRange.Columns.Count
vRowFirst = vRange.Row
vColFirst = vRange.Column
vRowSteps = 0
vColSteps = 0
vParFrom = ""
vParTo = ""
vParDat = ""
For vRow = 1 To vRowCount
For vCol = 1 To vColCount
vCell = ""
On Error Resume Next
vCell = Trim(UCase(vRange.Cells(vRow, vCol).Value))
On Error GoTo 0
If vCell = "!DIJK:GET:FROM" Then
vParFrom = Trim(UCase(vRange.Cells(vRow, vCol + 1).Value))
ElseIf vCell = "!DIJK:GET:TO" Then
vParTo = Trim(UCase(vRange.Cells(vRow, vCol + 1).Value))
ElseIf vCell = "!DIJK:DAT" Then
vParDat = Trim(UCase(vRange.Cells(vRow, vCol + 1).Value))
ElseIf vCell = "!DIJK:STEPS" Then
vRowSteps = vRow
vColSteps = vCol
End If
Next
Next
If vParFrom = "" Then vError = vError & "Need to specify a Source with the parameter !dijk:get:from" & vbCrLf & vbCrLf
If vParTo = "" Then vError = vError & "Need to specify a Destination with the parameter !dijk:get:to" & vbCrLf & vbCrLf
If vRowSteps = 0 Then vError = vError & "Need to designate an area to print the results with the parameter !dijk:steps" & vbCrLf & vbCrLf
If vError <> "" Then GoTo ErrorHandler
'Clean up the output area
vRange.Range(vRange.Cells(vRowSteps + 2 - vRowFirst, vColSteps + 1 - vColFirst).Address, vRange.Cells(vRowCount + vRowFirst - 1, vColSteps + 3 - vColFirst).Address).ClearContents
'Get the paths from the data sheet
If vParDat = "" Then
Set vRangeDat = vRange
Else
Set vRangeDat = ActiveWorkbook.Worksheets(vParDat).UsedRange
End If
vRowCountDat = vRangeDat.Rows.Count
vColCountDat = vRangeDat.Columns.Count
vRowDatHead = 0
vColDatFrom = 0
vColDatTo = 0
vColDatDist = 0
For vRow = 1 To vRowCountDat
For vCol = 1 To vColCountDat
vCell = ""
On Error Resume Next
vCell = Trim(UCase(vRangeDat.Cells(vRow, vCol).Value))
On Error GoTo 0
If vCell = "!DIJK:DAT:FROM" Then
vRowDatHead = vRow
vColDatFrom = vCol
ElseIf vCell = "!DIJK:DAT:TO" Then
vRowDatHead = vRow
vColDatTo = vCol
ElseIf vCell = "!DIJK:DAT:DIST" Then
vRowDatHead = vRow
vColDatDist = vCol
End If
Next
If vRowDatHead > 0 Then Exit For
Next
If vColDatFrom = 0 Then vError = vError & "Data sheet is missing !dijk:dat:from column" & vbCrLf & vbCrLf
If vColDatTo = 0 Then vError = vError & "Data sheet is missing !dijk:dat:to column" & vbCrLf & vbCrLf
If vColDatDist = 0 Then vError = vError & "Data sheet is missing !dijk:dat:dist column" & vbCrLf & vbCrLf
If vError <> "" Then GoTo ErrorHandler
Set vDist = New Scripting.Dictionary
For vRow = vRowDatHead + 1 To vRowCountDat
vCellFrom = ""
vCellTo = ""
vValDist = -1
On Error Resume Next
vCellFrom = Trim(UCase(vRangeDat.Cells(vRow, vColDatFrom).Value))
vCellTo = Trim(UCase(vRangeDat.Cells(vRow, vColDatTo).Value))
vValDist = Val(Trim(UCase(vRangeDat.Cells(vRow, vColDatDist).Value)))
On Error GoTo 0
If vCellFrom <> "" And vCellTo <> "" And vValDist >= 0 Then
If Not vDist.Exists(vCellFrom) Then Set vDist.Item(vCellFrom) = New Scripting.Dictionary
If Not vDist.Exists(vCellTo) Then Set vDist.Item(vCellTo) = New Scripting.Dictionary
vDist(vCellFrom).Item(vCellTo) = vValDist
If Not vDist(vCellTo).Exists(vCellFrom) Then vDist(vCellTo).Item(vCellFrom) = vValDist
End If
Next
If Not vDist.Exists(vParFrom) Then vError = vError & "Source " & vParFrom & " not listed in data" & vbCrLf & vbCrLf
If Not vDist.Exists(vParTo) Then vError = vError & "Destination " & vParTo & " not listed in data" & vbCrLf & vbCrLf
If vError <> "" Then GoTo ErrorHandler
'Calculate the shortest path
For Each vKey In vDist.Keys()
vDist(vKey).Item("!dist") = -1
vDist(vKey).Item("!scan") = False
vDist(vKey).Item("!steps") = ""
Next
vDist(vParFrom).Item("!dist") = 0
vDist(vParFrom).Item("!steps") = vParFrom
Do While True
vCurNode = ""
vCurDist = 0
For Each vKey In vDist.Keys()
If vDist(vKey)("!scan") = False Then
If vDist(vKey)("!dist") >= 0 Then
If vCurNode = "" Or vCurDist > vDist(vKey)("!dist") Then
vCurNode = vKey
vCurDist = vDist(vKey)("!dist")
End If
End If
End If
Next
If vCurNode = "" Then Exit Do
If vCurNode = vParTo Then Exit Do
vDist(vCurNode).Item("!scan") = True
For Each vKey In vDist(vCurNode).Keys()
If Left(vKey, 1) <> "!" And vKey <> vCurNode Then
vCurDistTo = vCurDist + vDist(vCurNode)(vKey)
If vDist(vKey)("!dist") < 0 Or vCurDistTo < vDist(vKey)("!dist") Then
vDist(vKey).Item("!dist") = vCurDistTo
vDist(vKey).Item("!steps") = vDist(vCurNode)("!steps") & "!" & vKey
End If
End If
Next
Loop
'Print the result
If vDist(vParTo)("!dist") < 0 Then
vRange.Cells(vRowSteps + 1, vColSteps).Value = "No path found from source to destination"
Else
vSteps = Split(vDist(vParTo)("!steps"), "!")
For vRow = 1 To UBound(vSteps)
vRange.Cells(vRowSteps + vRow, vColSteps).Value = vSteps(vRow - 1)
vRange.Cells(vRowSteps + vRow, vColSteps + 1).Value = vSteps(vRow)
vRange.Cells(vRowSteps + vRow, vColSteps + 2).Value = vDist(vSteps(vRow - 1))(vSteps(vRow))
Next
vRange.Cells(vRowSteps + vRow, vColSteps).Value = "Total:"
vRange.Cells(vRowSteps + vRow, vColSteps + 2).Value = vDist(vParTo)("!dist")
End If
'Done
MsgBox "Done", vbOKOnly + vbInformation, "Path and Distance"
GoTo Finalize
ErrorHandler:
Err.Clear
MsgBox vError, vbOKOnly + vbCritical, "Error"
Finalize:
Set vDist = Nothing
End Sub
The code works, but I would like some feedback on the following aspects:
- How can I make this easier and more intuitive for a user? I know I can use named ranges instead of flags, but I would prefer to keep it more visibly obvious what the code is using
- How can I apply the DRY principle more here. I'm repeating the same patterns all the time, but it seems like the details vary too much for me to just stick something like the nested for loops into a function
- I use
Scripting.Dictionary
for almost everything due to it's flexibility and simply due to the fact that I am comfortable with how it works, but I suspect there may be better data structures that I can use which work better for this use case - At the heart of this is the
Do While True
loop which is probably a horribly inefficient way to implement Dijkstra's Algorithm. How can I make it more efficient? - Any help/critique is greatly appreciated. I taught myself VBA with the help of Google and I may be doing some bad things I never even realised
2 Answers 2
That is a humungous procedure you got there. It's all in one piece and it's following a lot of conventions from older VBA code that you should not let become a habit.
The first of these "conventions" is to declare every variable at the top of the block they're scoped to. This is a relic of "ye olden days" where it was important to know up front what things were in a procedure and how you could refer to them. See what that does to the readability of your code on a screen that is not in portrait orientation:
declarations section from the question screenshotted
This is... not really useful, because I can't even remotely tell what variable is needed where and even whether it's needed.
Declare variables as close as possible to their usage.
This has the added benefit of reducing the mental strain when reading the code. You don't need to remember every variable declaration, only those in close proximity to understand the code.
While we're at that section: I noticed that you prefixed every single one of these variables with a v
, most likely for "Variable". Don't do that. This adds no useful information to the name of the variable and should frankly speaking be unnecessary.
'Check that there is a workbook open If ActiveSheet Is Nothing Then vError = "You need to open a workbook in order to do this" If vError <> "" Then GoTo ErrorHandler
Let's rewrite this a bit. For one the comment is a lie, this doesn't check that a workbook is open, it checks that the ActiveSheet property of the global Application object is not Nothing. In addition you're making this somewhat harder to read by forcing yourself into single-line if-statements:
If ActiveSheet Is Nothing Then
vError = "You need to open a workbook and select a sheet in order to do this"
GoTo ErrorHandler
End If
Doing all the work is long and tedious. This code can benefit a lot from extracting subroutines into actual subroutines or Functions. Consider encapsulating blocks of code that have an explanatory comment into their own function:
Sub SolveDijkstra()
If Not CheckWorkbook() Then Exit Sub
Dim settings As DijkstraSettings
Set settings = GetSettingsFromCurrentSheet()
If Not CheckSettings(settings) Then Exit Sub
CleanOutputArea()
Dim graph As Graph
Set graph = GetPathsFromDataSheet()
If Not CheckGraph(graph) Then Exit Sub
Dim path As DijkstraSolution
Set path = Dijkstra(graph, settings)
If path.HasSolution Then
WritePathToOutput(path)
Else
WriteErrorToOutput("No path found from source to destination")
End If
MsgBox "Done", vbOKOnly + vbInformation, "Path and Distance"
End Sub
This "reimagination" has the clear benefit of allowing us to abstract the tedious separate steps into methods and objects. We don't need to understand the 10 different loops to grasp what this sub does.
Note that this also sidesteps the usage of GoTo which is ... problematic in some contexts.
In closing I want to explicitly call out stuff I noticed as outstanding:
- You always access a cell's value explicitly through
Value
:+1: - You use a dictionary to keep track of the cost for a given node
- You reinstate error handling after
On Error Resume Next
statements and seem to have tried to keep OERN areas as small as possible - You validate your input and have a pretty clean way of getting it from your sheet
What's missing to take your VBA coding to the next step is the use of objects and user-defined types as well as noting that you don't need to reuse a block of code to extract it.
-
\$\begingroup\$ Thank you. This is exactly the kind of feedback I was looking for. I agree with all your points. Time to learn about user-defined objects in VBA. I'm not very familiar with objects in VBA, but have used them in other languages \$\endgroup\$neelsg– neelsg2018年07月20日 07:36:51 +00:00Commented Jul 20, 2018 at 7:36
Question 1
How can I make this easier and more intuitive for a user? I know I can use named ranges instead of flags, but I would prefer to keep it more visibly obvious what the code is using
The first thing that I would do is rename all data headers. I understand that you wanted an unique identifier that "!dijk:dat:from" to me is not at all intuitive to me. After studying Dijkstra's algorithm I came back to determined that it is represents Dijkstra's - data - From
nodes (vertices). Although they make perfect sense, I also don't like From
and To
. I'm not an authority but I think that there is no inherent direction to the connection of two nodes in a graph.
Alternate Column Headers:
- !dijk:dat:from Node, Node1, Vertex, Vertex1
- !dijk:dat:to Neighbor, Node2, Vertex2
- !dijk:dat:dist Distance
- !dijk:get:from Origin
- !dijk:get:to Destination
- !dijk:steps Path
Question 2
How can I apply the DRY principle more here. I'm repeating the same patterns all the time, but it seems like the details vary too much for me to just stick something like the nested for loops into a function
There isn't really any unnecessary repeated code.
↓This loop↓ is repeated twice but the second loop relies on information gathered from the first loop.
For vRow = 1 To vRowCount For vCol = 1 To vColCount
Question 3
I use Scripting.Dictionary for almost everything due to it's flexibility and simply due to the fact that I am comfortable with how it works, but I suspect there may be better data structures that I can use which work better for this use case
A custom Node class would have made it easier work out the logic but it is not necessary.
Pseudo Class: GraphNode
Option Explicit
Const Infinity As Long = 2147483647
Private Type Members
NodeKey As String
NeighborKey As String
Distance As Long
End Type
Private m As Members
Public Sub Init(NodeKey As String, Optional NeighborKey As String, Optional Distance As Long = Infinity)
m.NodeKey = NodeKey
m.NeighborKey = NeighborKey
m.Distance = Distance
End Sub
Public Function getKey() As String
Attribute Value.VB_UserMemId = 0
getKey = m.NodeKey = NodeKey
End Function
Public Function getDistance(Node As GraphNode) As Long
getDistance = m.Distance
End Function
Public Function UpdateDistance(Node As GraphNode) As Boolean
If Node.getDistance < m.Distance Then
UpdateDistance = True
m.Distance = Node.getDistance
End If
End Function
I would make some changes in the way that you use the dictionaries.
Using these keys "!dist", "!steps", "!scan" as properties works very well.
vDist(vKey)("!dist") vDist(vKey).Item("!dist") vDist(vKey).Item("!steps") vDist(vKey).Item("!scan")
I would use constants instead. This will not only make the code easier to read but enable you to use intellisense with them.
Item
can be omitted because the default member of a Scripting Dictionary is Items()
.
vDist(vKey).Item("!dist") vDist(vKey)("!dist")
Since Items()
takes a string as an argument we can use the Bang operator to pass a string key to Items()
and return the it's value.
vDist(vKey)![!dist]
The bracket are necessary because !dist
is not a valid variable name. Removing !
from the variable names will allow you to do this:
vDist(vKey)!dist
Question 4
At the heart of this is the Do While True loop which is probably a horribly inefficient way to implement Dijkstra's Algorithm. How can I make it more efficient?
Dijkstra's Algorithm requires a Do
or a While
loop this is unavoidable. Implementing the min-priority queue meationed in the article you linked would make it more efficient but that is out of the scope of a review.
Wikipedia - Dijkstra's algorithm
A min-priority queue is an abstract data type that provides 3 basic operations : add_with_priority(), decrease_priority() and extract_min(). As mentioned earlier, using such a data structure can lead to faster computing times than using a basic queue.
Add Template
Creating a subroutine to add a template worksheet would make the setup easier for both you and the users. Using constants for the Column Headers and working with tables would also simplify things.
Const NodeHeader As String = "Node", NeighborHeader As String = "Neighbor", DistanceHeader As String = "Distance"
Const OriginHeader As String = "Origin", DestinationHeader As String = "Destination"
Const Distance As String = "Distance", Path As String = "Path", Visited As String = "Visited"
Sub AddTemplate()
Dim TableRange As Range
With Worksheets.Add
' Add Graph NodeList Table
.Range("B1").Value = "Graph - Node List"
Set TableRange = .Range("B2").Resize(1, 3)
TableRange.Value = Array(NodeHeader, NeighborHeader, DistanceHeader)
.ListObjects.Add xlSrcRange, TableRange, , xlYes
' Add Settings Table
.Range("F1").Value = "Settings"
Set TableRange = .Range("F2").Resize(1, 2)
TableRange.Value = Array(OriginHeader, DestinationHeader)
.ListObjects.Add xlSrcRange, TableRange, , xlYes
' Add Results Table
.Range("F5").Value = "Results"
Set TableRange = .Range("F6").Resize(1, 3)
TableRange.Value = Array(OriginHeader, DestinationHeader, DistanceHeader)
.ListObjects.Add(xlSrcRange, TableRange, , xlYes).ShowTotals = True
TableRange.Cells(3, 3).Formula = "=SUBTOTAL(109,[Distance])"
End With
End Sub
-
\$\begingroup\$ For the record: I dislike the suggestion of the bang operator. Other than that, this looks pretty good :) \$\endgroup\$Vogel612– Vogel6122018年09月04日 00:53:47 +00:00Commented Sep 4, 2018 at 0:53