In this code, I focused more on code readability rather than algorithmic complexity. I didn't really care about the code in the main
function because it was just used to test the code. Some of the classes have a D prefix to differentiate them from the regular Nodes
.
A few problems in particular I would like comments on:
- Storing the name of the node both in the dictionary key and the name field seems wrong. I would think it has some of the same problems as parallel lists.
- The
DGraph
class's only functionality is to be a wrapper for a list. On the otherhand, I like it because it is more verbose. - Should
connections
stay as(node, cost)
tuples or should I make a newDConnection
class? - I'm hoping to avoid external libraries
class DNode:
def __init__(self, name):
self.connections = []
self.name = name
self.total_cost = -1
self.last_connection = None
self.visited = False
def add_connection(self, node, cost):
self.connections.append((node, cost))
for connection in node.connections:
if connection[0] is self:
return
node.add_connection(self, cost)
class DGraph:
def __init__(self):
self.nodes = []
def new_node(self, name):
node = DNode(name)
self.nodes.append(node)
return node
class Dijkstra:
def __init__(self, graph, start_node, end_node):
self.graph = graph
self.start_node = start_node
self.end_node = end_node
self.dlist = DList(graph.nodes, start_node)
def calculate_shortest_path(self):
self.recursive_helper(self.start_node)
node = self.end_node
node_steps = []
while node != self.start_node:
node_steps.append(node)
node = node.last_connection
node_steps.append(self.start_node)
return reversed(node_steps)
def recursive_helper(self, current_node):
current_cost = current_node.total_cost
for connection in current_node.connections:
neighbor = connection[0]
if not neighbor.visited:
total_cost = current_cost + connection[1]
if neighbor.total_cost == -1 or total_cost < neighbor.total_cost:
neighbor.total_cost = total_cost
neighbor.last_connection = current_node
current_node.visited = True
if self.end_node.visited:
return
self.recursive_helper(self.dlist.get_smallest_unvisited_dnode())
class DList:
def __init__(self, node_list, start_node):
self.nodes = list(node_list)
start_node.total_cost = 0
def get_smallest_unvisited_dnode(self):
smallest = None
for node in self.nodes:
if not node.visited and node.total_cost != -1:
if smallest is None or node.total_cost < smallest.total_cost:
smallest = node
return smallest
class GraphParser:
def __init__(self, unparsed_string):
self.unparsed_string = unparsed_string
self.nodes = {}
self.graph = DGraph()
def create_nodes(self):
for line in self.unparsed_string.split("\n"):
if line[0:5] == "node ":
node_name = line[5:]
node = self.graph.new_node(node_name)
self.nodes[node_name] = node
def create_connections(self):
for line in self.unparsed_string.split("\n"):
if line[0:5] == "node ":
node_name = line[5:]
node = self.nodes[node_name]
elif line[0] == "\t":
connection_data = line[1:].split(",")
neighbor_name = connection_data[0].strip()
if len(connection_data) > 0:
cost = int(connection_data[1].strip())
else:
cost = 1
neighbor = self.nodes[neighbor_name]
node.add_connection(neighbor, cost)
def get_graph(self):
self.create_nodes()
self.create_connections()
return self.graph
def main():
filename = "graph.txt"
f = open(filename, "r")
content = f.read()
f.close()
gp = GraphParser(content)
graph = gp.get_graph()
start_node = gp.nodes["S"]
end_node = gp.nodes["E"]
d = Dijkstra(graph, start_node, end_node)
path = d.calculate_shortest_path()
for i in path:
print(i.name)
main()
Sample graph.txt
node A
D, 4
B, 3
S, 7
node B
S, 2
A, 3
D, 4
H, 1
node C
S, 3
L, 2
node D
A, 4
B, 4
F, 5
node E
K, 5
G, 2
node F
H, 3
D, 5
node G
H, 2
E, 2
node H
B, 1
F, 3
G, 2
node I
L, 4
J, 6
K, 4
node J
L, 4
I, 6
K, 4
node K
I, 4
K, 4
node L
I, 4
J, 4
node S
A, 7
B, 2
C, 3
1 Answer 1
I think you are using classes where you don't need to. Your DGraph
class is one (as you have already half-realized) but your GraphParser
is another. You even have to parse the file content twice because of your separation of concerns. In my opinion it would be way easier if this was a simple function taking an iterable of lines (such as an open file) and creating a dict
of Node
s from this (mapping the name to the node):
class Node:
def __init__(self, name):
self.connections = set()
self.name = name
self.total_cost = -1
self.last_connection = None
self.visited = False
def __str__(self):
return f"Name: {self.name}, total cost: {self.total_cost}, connections: {[(node.name, cost) for node, cost in self.connections]}"
__repr__ = __str__
def parse_graph(lines):
graph = {}
for line in lines:
if line.startswith("node "):
name = line[5:]
# Get the node if it already exists, otherwise create a new node
node = graph.setdefault(name, Node(name))
elif line.startswith(("\t", " "*4)): # some editors might replace \t with 4 spaces, careful!
name2, cost = line.strip().split(",")
# Get the other node if it already exists, otherwise create a new node
node2 = graph.setdefault(name2, Node(name2))
# add connection manually, no need to iterate over all existing connections
# reverse connection added in definition of node2
node.connections.add((node2, int(cost))) # int can deal with whitespace
else:
raise ValueError(f"Cannot parse line: {line}")
return graph
The crucial part here is adding a node when you see it the first time (even just as a target of a connection) and getting the same node back again when it already exists. For this I used dict.setdefault
, which is functionally equivalent to:
if name not in graph:
graph[name] = Node(name)
node = graph[name]
Currently there is no sanity check that all nodes that are referenced as targets of connections are actually defined, you might want to add that.
It also uses the fact that Node.connections
is a set
, so adding an already exisitng conenction does not hurt, because the tuple (Node, cost)
is hashable (using the id
of `Node).
For your example file this returns this graph structure:
# {'A': Name: A, total cost: -1, connections: [('S', 7), ('D', 4), ('B', 3)],
# 'B': Name: B, total cost: -1, connections: [('H', 1), ('D', 4), ('S', 2), ('A', 3)],
# 'C': Name: C, total cost: -1, connections: [('L', 2), ('S', 3)],
# 'D': Name: D, total cost: -1, connections: [('F', 5), ('A', 4), ('B', 4)],
# 'E': Name: E, total cost: -1, connections: [('K', 5), ('G', 2)],
# 'F': Name: F, total cost: -1, connections: [('D', 5), ('H', 3)],
# 'G': Name: G, total cost: -1, connections: [('E', 2), ('H', 2)],
# 'H': Name: H, total cost: -1, connections: [('F', 3), ('B', 1), ('G', 2)],
# 'I': Name: I, total cost: -1, connections: [('L', 4), ('K', 4), ('J', 6)],
# 'J': Name: J, total cost: -1, connections: [('I', 6), ('L', 4), ('K', 4)],
# 'K': Name: K, total cost: -1, connections: [('K', 4), ('J', 4), ('E', 5), ('I', 4)],
# 'L': Name: L, total cost: -1, connections: [('J', 4), ('C', 2), ('I', 4)],
# 'S': Name: S, total cost: -1, connections: [('C', 3), ('A', 7), ('B', 2)]}
You will probably have to slightly adapt your Dijkstra
class for this, but it should not make it any harder.
For the file opening and closing, you should use with
and a if __name__ == "__main__":
guard:
def main():
with open("graph.txt") as file:
graph = parse_graph(file)
start_node = graph["S"]
end_node = graph["E"]
d = Dijkstra(graph, start_node, end_node)
path = d.calculate_shortest_path()
for i in path:
print(i.name)
if __name__ == "__main__":
main()
If you have control over the file format, you can take this assuming a node exists if it is mentioned to an extreme to cut down on the number of lines needed. Your graph.txt
could then be (assuming all connections are still bi-directional and cost symmetric):
A, D, 4
A, B, 3
A, S, 7
B, S, 2
B, D, 4
B, H, 1
C, S, 3
C, L, 2
D, F, 5
E, G, 2
E, K, 5
F, H, 3
G, H, 2
I, L, 4
I, K, 4
I, J, 6
J, L, 4
J, K, 4
-
\$\begingroup\$ Thanks for the review. It's strange using Python after spending the school year using Java. I kinda miss explicitly specifying the types. The purpose of DGraph was to specify that it was a list of DNodes. I suppose I'll have to readjust to the Python ways. \$\endgroup\$jkd– jkd2018年07月19日 21:02:39 +00:00Commented Jul 19, 2018 at 21:02