Here is my code for Tarjan's Strongly Connected Components algorithm. Please point out any bugs, performance/space (algorithm time/space complexity) optimization or code style issues.
from collections import defaultdict
class SccGraph:
def __init__(self, vertex_size):
self.out_neighbour = defaultdict(list)
self.vertex = set()
self.visited = set()
self.index = defaultdict(int)
self.low_index = defaultdict(int)
self.global_index = 0
self.visit_stack = []
self.scc = []
def add_edge(self, from_node, to_node):
self.vertex.add(from_node)
self.vertex.add(to_node)
self.out_neighbour[from_node].append(to_node)
def dfs_graph(self):
for v in self.vertex:
if v not in self.visited:
self.dfs_node(v)
def dfs_node(self, v):
# for safe protection
if v in self.visited:
return
self.index[v] = self.global_index
self.low_index[v] = self.global_index
self.global_index += 1
self.visit_stack.append(v)
self.visited.add(v)
for n in self.out_neighbour[v]:
if n not in self.visited:
self.dfs_node(n)
self.low_index[v] = min(self.low_index[v], self.low_index[n])
elif n in self.visit_stack:
self.low_index[v] = min(self.low_index[v], self.index[n])
result = []
if self.low_index[v] == self.index[v]:
w = self.visit_stack.pop(-1)
while w != v:
result.append(w)
w = self.visit_stack.pop(-1)
result.append(v)
self.scc.append(result)
if __name__ == "__main__":
g = SccGraph(5)
# setup a graph 1->2->3 and 3 -> 1 which forms a scc
# setup another two edges 3->4 and 4->5
g.add_edge(1,2)
g.add_edge(2,3)
g.add_edge(3,1)
g.add_edge(3,4)
g.add_edge(4,5)
g.dfs_graph()
print g.scc
1 Answer 1
Please point out any ... code style issues.
Certainly.
Layout
The code could use some breathing space in terms of blank lines between functions.
The black program can be used to automatically reformat the code.
Portability
I realize this question was posted many years ago when Python version 2.x was prevalent, but now that it is deprecated, consider porting to 3.x. All that is needed is to change:
print g.scc
to:
print(g.scc)
Documentation
The PEP 8 style guide recommends adding docstrings for classes and functions. For example, you could expand acronyms:
class SccGraph:
"""Strongly Connected Components (SCC) algorithm"""
def dfs_graph(self):
"""Depth-first Search (DFS) graph"""
For the functions, it would also be helpful to describe the input and return types in the docstrings.
Lint check
pylint identified the following unused variable:
vertex_size
in this line:
def __init__(self, vertex_size):
You could simplify the code by removing the variable:
def __init__(self):
g = SccGraph()
Naming
Many of the variable names are descriptive, but some could be improved. For example:
for v in self.vertex:
Since vertex
is an array, it would be better pluralized (vertices
).
Since v
is vague, it could be vertex
:
for vertex in self.vertices:
-
\$\begingroup\$ What's the advantage of describing the input and return types in the docstrings instead of with function annotations? \$\endgroup\$Kelly Bundy– Kelly Bundy2025年07月21日 16:35:09 +00:00Commented Jul 21 at 16:35