Python: Dijkstra's Algorithm

Question 1

In this code, I focused more on code readability rather than algorithmic complexity. I didn't really care about the code in the main function because it was just used to test the code. Some of the classes have a D prefix to differentiate them from the regular Nodes.

A few problems in particular I would like comments on:

Storing the name of the node both in the dictionary key and the name field seems wrong. I would think it has some of the same problems as parallel lists.
The DGraph class's only functionality is to be a wrapper for a list. On the otherhand, I like it because it is more verbose.
Should connections stay as (node, cost) tuples or should I make a new DConnection class?
I'm hoping to avoid external libraries

class DNode:
 def __init__(self, name):
 self.connections = []
 self.name = name
 self.total_cost = -1
 self.last_connection = None
 self.visited = False
 def add_connection(self, node, cost):
 self.connections.append((node, cost))
 for connection in node.connections:
 if connection[0] is self:
 return
 node.add_connection(self, cost)
class DGraph:
 def __init__(self):
 self.nodes = []
 def new_node(self, name):
 node = DNode(name)
 self.nodes.append(node)
 return node
class Dijkstra:
 def __init__(self, graph, start_node, end_node):
 self.graph = graph
 self.start_node = start_node
 self.end_node = end_node
 self.dlist = DList(graph.nodes, start_node)
 def calculate_shortest_path(self):
 self.recursive_helper(self.start_node)
 node = self.end_node
 node_steps = []
 while node != self.start_node:
 node_steps.append(node)
 node = node.last_connection
 node_steps.append(self.start_node)
 return reversed(node_steps)
 def recursive_helper(self, current_node):
 current_cost = current_node.total_cost
 for connection in current_node.connections:
 neighbor = connection[0]
 if not neighbor.visited:
 total_cost = current_cost + connection[1]
 if neighbor.total_cost == -1 or total_cost < neighbor.total_cost:
 neighbor.total_cost = total_cost
 neighbor.last_connection = current_node
 current_node.visited = True
 if self.end_node.visited:
 return
 self.recursive_helper(self.dlist.get_smallest_unvisited_dnode())
class DList:
 def __init__(self, node_list, start_node):
 self.nodes = list(node_list)
 start_node.total_cost = 0
 def get_smallest_unvisited_dnode(self):
 smallest = None
 for node in self.nodes:
 if not node.visited and node.total_cost != -1:
 if smallest is None or node.total_cost < smallest.total_cost:
 smallest = node
 return smallest
class GraphParser:
 def __init__(self, unparsed_string):
 self.unparsed_string = unparsed_string
 self.nodes = {}
 self.graph = DGraph()
 def create_nodes(self):
 for line in self.unparsed_string.split("\n"):
 if line[0:5] == "node ":
 node_name = line[5:]
 node = self.graph.new_node(node_name)
 self.nodes[node_name] = node
 def create_connections(self):
 for line in self.unparsed_string.split("\n"):
 if line[0:5] == "node ":
 node_name = line[5:]
 node = self.nodes[node_name]
 elif line[0] == "\t":
 connection_data = line[1:].split(",")
 neighbor_name = connection_data[0].strip()
 if len(connection_data) > 0:
 cost = int(connection_data[1].strip())
 else:
 cost = 1
 neighbor = self.nodes[neighbor_name]
 node.add_connection(neighbor, cost)
 def get_graph(self):
 self.create_nodes()
 self.create_connections()
 return self.graph
def main():
 filename = "graph.txt"
 f = open(filename, "r")
 content = f.read()
 f.close()
 gp = GraphParser(content)
 graph = gp.get_graph()
 start_node = gp.nodes["S"]
 end_node = gp.nodes["E"]
 d = Dijkstra(graph, start_node, end_node)
 path = d.calculate_shortest_path()
 for i in path:
 print(i.name)
main()

Sample graph.txt

node A
 D, 4
 B, 3
 S, 7
node B
 S, 2
 A, 3
 D, 4
 H, 1
node C
 S, 3
 L, 2
node D
 A, 4
 B, 4
 F, 5
node E
 K, 5
 G, 2
node F
 H, 3
 D, 5
node G
 H, 2
 E, 2
node H
 B, 1
 F, 3
 G, 2
node I
 L, 4
 J, 6
 K, 4
node J
 L, 4
 I, 6
 K, 4
node K
 I, 4
 K, 4
node L
 I, 4
 J, 4
node S
 A, 7
 B, 2
 C, 3

Question 2

I think you are using classes where you don't need to. Your DGraph class is one (as you have already half-realized) but your GraphParser is another. You even have to parse the file content twice because of your separation of concerns. In my opinion it would be way easier if this was a simple function taking an iterable of lines (such as an open file) and creating a dict of Nodes from this (mapping the name to the node):

class Node:
 def __init__(self, name):
 self.connections = set()
 self.name = name
 self.total_cost = -1
 self.last_connection = None
 self.visited = False
 def __str__(self):
 return f"Name: {self.name}, total cost: {self.total_cost}, connections: {[(node.name, cost) for node, cost in self.connections]}"
 __repr__ = __str__
def parse_graph(lines):
 graph = {}
 for line in lines:
 if line.startswith("node "):
 name = line[5:]
 # Get the node if it already exists, otherwise create a new node
 node = graph.setdefault(name, Node(name))
 elif line.startswith(("\t", " "*4)): # some editors might replace \t with 4 spaces, careful!
 name2, cost = line.strip().split(",")
 # Get the other node if it already exists, otherwise create a new node
 node2 = graph.setdefault(name2, Node(name2))
 # add connection manually, no need to iterate over all existing connections
 # reverse connection added in definition of node2
 node.connections.add((node2, int(cost))) # int can deal with whitespace
 else:
 raise ValueError(f"Cannot parse line: {line}")
 return graph

The crucial part here is adding a node when you see it the first time (even just as a target of a connection) and getting the same node back again when it already exists. For this I used dict.setdefault, which is functionally equivalent to:

if name not in graph:
 graph[name] = Node(name)
node = graph[name]

Currently there is no sanity check that all nodes that are referenced as targets of connections are actually defined, you might want to add that.

It also uses the fact that Node.connections is a set, so adding an already exisitng conenction does not hurt, because the tuple (Node, cost) is hashable (using the id of `Node).

For your example file this returns this graph structure:

# {'A': Name: A, total cost: -1, connections: [('S', 7), ('D', 4), ('B', 3)],
# 'B': Name: B, total cost: -1, connections: [('H', 1), ('D', 4), ('S', 2), ('A', 3)],
# 'C': Name: C, total cost: -1, connections: [('L', 2), ('S', 3)],
# 'D': Name: D, total cost: -1, connections: [('F', 5), ('A', 4), ('B', 4)],
# 'E': Name: E, total cost: -1, connections: [('K', 5), ('G', 2)],
# 'F': Name: F, total cost: -1, connections: [('D', 5), ('H', 3)],
# 'G': Name: G, total cost: -1, connections: [('E', 2), ('H', 2)],
# 'H': Name: H, total cost: -1, connections: [('F', 3), ('B', 1), ('G', 2)],
# 'I': Name: I, total cost: -1, connections: [('L', 4), ('K', 4), ('J', 6)],
# 'J': Name: J, total cost: -1, connections: [('I', 6), ('L', 4), ('K', 4)],
# 'K': Name: K, total cost: -1, connections: [('K', 4), ('J', 4), ('E', 5), ('I', 4)],
# 'L': Name: L, total cost: -1, connections: [('J', 4), ('C', 2), ('I', 4)],
# 'S': Name: S, total cost: -1, connections: [('C', 3), ('A', 7), ('B', 2)]}

You will probably have to slightly adapt your Dijkstra class for this, but it should not make it any harder.

For the file opening and closing, you should use with and a if __name__ == "__main__": guard:

def main():
 with open("graph.txt") as file:
 graph = parse_graph(file)
 start_node = graph["S"]
 end_node = graph["E"]
 d = Dijkstra(graph, start_node, end_node)
 path = d.calculate_shortest_path()
 for i in path:
 print(i.name)
if __name__ == "__main__":
 main()

If you have control over the file format, you can take this assuming a node exists if it is mentioned to an extreme to cut down on the number of lines needed. Your graph.txt could then be (assuming all connections are still bi-directional and cost symmetric):

A, D, 4
A, B, 3
A, S, 7
B, S, 2
B, D, 4
B, H, 1
C, S, 3
C, L, 2
D, F, 5
E, G, 2
E, K, 5
F, H, 3
G, H, 2
I, L, 4
I, K, 4
I, J, 6
J, L, 4
J, K, 4

Question 3

Thanks for the review. It's strange using Python after spending the school year using Java. I kinda miss explicitly specifying the types. The purpose of DGraph was to specify that it was a list of DNodes. I suppose I'll have to readjust to the Python ways.

Graipher Graipher 41.7k7 gold badges70 silver badges134 bronze badges · Accepted Answer · 2018-07-19 09:30:04Z

I think you are using classes where you don't need to. Your DGraph class is one (as you have already half-realized) but your GraphParser is another. You even have to parse the file content twice because of your separation of concerns. In my opinion it would be way easier if this was a simple function taking an iterable of lines (such as an open file) and creating a dict of Nodes from this (mapping the name to the node):

class Node:
 def __init__(self, name):
 self.connections = set()
 self.name = name
 self.total_cost = -1
 self.last_connection = None
 self.visited = False
 def __str__(self):
 return f"Name: {self.name}, total cost: {self.total_cost}, connections: {[(node.name, cost) for node, cost in self.connections]}"
 __repr__ = __str__
def parse_graph(lines):
 graph = {}
 for line in lines:
 if line.startswith("node "):
 name = line[5:]
 # Get the node if it already exists, otherwise create a new node
 node = graph.setdefault(name, Node(name))
 elif line.startswith(("\t", " "*4)): # some editors might replace \t with 4 spaces, careful!
 name2, cost = line.strip().split(",")
 # Get the other node if it already exists, otherwise create a new node
 node2 = graph.setdefault(name2, Node(name2))
 # add connection manually, no need to iterate over all existing connections
 # reverse connection added in definition of node2
 node.connections.add((node2, int(cost))) # int can deal with whitespace
 else:
 raise ValueError(f"Cannot parse line: {line}")
 return graph

The crucial part here is adding a node when you see it the first time (even just as a target of a connection) and getting the same node back again when it already exists. For this I used dict.setdefault, which is functionally equivalent to:

if name not in graph:
 graph[name] = Node(name)
node = graph[name]

Currently there is no sanity check that all nodes that are referenced as targets of connections are actually defined, you might want to add that.

It also uses the fact that Node.connections is a set, so adding an already exisitng conenction does not hurt, because the tuple (Node, cost) is hashable (using the id of `Node).

For your example file this returns this graph structure:

# {'A': Name: A, total cost: -1, connections: [('S', 7), ('D', 4), ('B', 3)],
# 'B': Name: B, total cost: -1, connections: [('H', 1), ('D', 4), ('S', 2), ('A', 3)],
# 'C': Name: C, total cost: -1, connections: [('L', 2), ('S', 3)],
# 'D': Name: D, total cost: -1, connections: [('F', 5), ('A', 4), ('B', 4)],
# 'E': Name: E, total cost: -1, connections: [('K', 5), ('G', 2)],
# 'F': Name: F, total cost: -1, connections: [('D', 5), ('H', 3)],
# 'G': Name: G, total cost: -1, connections: [('E', 2), ('H', 2)],
# 'H': Name: H, total cost: -1, connections: [('F', 3), ('B', 1), ('G', 2)],
# 'I': Name: I, total cost: -1, connections: [('L', 4), ('K', 4), ('J', 6)],
# 'J': Name: J, total cost: -1, connections: [('I', 6), ('L', 4), ('K', 4)],
# 'K': Name: K, total cost: -1, connections: [('K', 4), ('J', 4), ('E', 5), ('I', 4)],
# 'L': Name: L, total cost: -1, connections: [('J', 4), ('C', 2), ('I', 4)],
# 'S': Name: S, total cost: -1, connections: [('C', 3), ('A', 7), ('B', 2)]}

You will probably have to slightly adapt your Dijkstra class for this, but it should not make it any harder.

For the file opening and closing, you should use with and a if __name__ == "__main__": guard:

def main():
 with open("graph.txt") as file:
 graph = parse_graph(file)
 start_node = graph["S"]
 end_node = graph["E"]
 d = Dijkstra(graph, start_node, end_node)
 path = d.calculate_shortest_path()
 for i in path:
 print(i.name)
if __name__ == "__main__":
 main()

If you have control over the file format, you can take this assuming a node exists if it is mentioned to an extreme to cut down on the number of lines needed. Your graph.txt could then be (assuming all connections are still bi-directional and cost symmetric):

A, D, 4
A, B, 3
A, S, 7
B, S, 2
B, D, 4
B, H, 1
C, S, 3
C, L, 2
D, F, 5
E, G, 2
E, K, 5
F, H, 3
G, H, 2
I, L, 4
I, K, 4
I, J, 6
J, L, 4
J, K, 4

Thanks for the review. It's strange using Python after spending the school year using Java. I kinda miss explicitly specifying the types. The purpose of DGraph was to specify that it was a list of DNodes. I suppose I'll have to readjust to the Python ways.

Stack Exchange Network

Python: Dijkstra's Algorithm

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Python: Dijkstra's Algorithm

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions