In order to get more accustomed with classes in Python, I have written a genetic algorithm, which takes a level with a start and end point and searches for a route (not necessarily the optimal one). The output shows the basic level and when a solution has been found, the level with the route:
Level:
############
O....#.....#
#.#.#.#.#..#
#........#.O
############
Solution:
############
O*...#.****#
#*#*#*#*#**#
#********#**
############
I would be interested in improvements of the structure of the code (i.e. not of the algorithm itself, only if there is an error), as I would like to improve my general knowledge of programming in Python.
There are some issues I am aware of:
- The parameters at the beginning could be written as enums, but I couldn't convince myself what the advantage would be (apart from polluting the global namespace?) I thought that the more concise way of writing "N" or "WALL" instead of "Direction.N" or "Object.Wall" added to the readability of the code.
- Class "Level": In principle, I would prefer that the attributes are read-only, but I am not sure how to define this properly. Also, I don't see the point of writing getters and setters here.
- In the same class, I didn't want to write __move_dict and __text_map twice in test_route and print_route, so I defined it as class variables. I am not sure if this is idiomatic at all.
- Similarly, test_route and print_route share the same code. I have been thinking if it would be possible to abstract away somehow the common loop, but I have no idea how to do this in Python.
""""Simple implementation of a genetic algorithm:
Searching for a possible route from a given start point
to an end point."""
import random
from dataclasses import dataclass
from typing import List
from collections import namedtuple
from operator import attrgetter
# PARAMETERS
# direction constants
N = 0
E = 1
S = 2
W = 3
# level constants
EMPTY = 0
WALL = 1
DOOR = 2
L1 = [[WALL, WALL, WALL, WALL, WALL, WALL, WALL, WALL, WALL, WALL, WALL, WALL],
[DOOR, EMPTY, EMPTY, EMPTY, EMPTY, WALL, EMPTY, EMPTY, EMPTY, EMPTY, EMPTY, WALL],
[WALL, EMPTY, WALL, EMPTY, WALL, EMPTY, WALL, EMPTY, WALL, EMPTY, EMPTY, WALL],
[WALL, EMPTY, EMPTY, EMPTY, EMPTY, EMPTY, EMPTY, EMPTY, EMPTY, WALL, EMPTY, DOOR],
[WALL, WALL, WALL, WALL, WALL, WALL, WALL, WALL, WALL, WALL, WALL, WALL]]
L1_WIDTH = 12
L1_HEIGHT = 5
# DATATYPES
Point = namedtuple("Point", "x y")
@dataclass
class Level:
"""Class for representing a level with a start and end point."""
map: list
width: int
height: int
start: Point
end: Point
__move_dict = {N: Point(0, 1),
E: Point(1, 0),
S: Point(0, -1),
W: Point(-1, 0)}
__text_map = {WALL: "#", EMPTY: ".", DOOR: "O"}
def test_route(self, genome):
"""Test a route encoded in a genome and return the final distance to the exit."""
def distance(point_a, point_b):
return abs(point_a.x - point_b.x) + abs(point_a.y - point_b.y)
position = self.start
for gene in genome.genes:
delta = self.__move_dict[gene]
new_pos = Point(position.x + delta.x,
position.y + delta.y)
if 0 <= new_pos.x < self.width:
if 0 <= new_pos.y < self.height:
if self.map[new_pos.y][new_pos.x] != WALL:
position = new_pos
if position == self.end:
break
return 1 / (1 + distance(position, self.end))
def print_level(self):
"""Print a text representation of a level."""
for row in self.map:
print("".join((self.__text_map[elem] for elem in row)))
def print_route(self, genome):
"""Print the route through the level."""
text_level = []
for row in self.map:
text_level.append([self.__text_map[elem] for elem in row])
position = self.start
for gene in genome.genes:
delta = self.__move_dict[gene]
new_pos = Point(position.x + delta.x,
position.y + delta.y)
if 0 <= new_pos.x < self.width:
if 0 <= new_pos.y < self.height:
if self.map[new_pos.y][new_pos.x] != WALL:
position = new_pos
text_level[new_pos.y][new_pos.x] = "*"
if position == self.end:
break
for row in text_level:
print("".join(row))
@dataclass
class Genome:
"""Class for representing the genome of running through a level."""
fitness: float
genes: List[int]
class GenomePool:
"""Class implementing the genetic algorithm."""
def __init__(self, level, pool_size, num_genes, crossover_rate, mutation_rate):
self.__level = level
self.__pool_size = pool_size
self.__num_genes = num_genes
self.__crossover_rate = crossover_rate
self.__mutation_rate = mutation_rate
self.__pool = [Genome(0, [random.randint(0, 3) for i in range(0, num_genes)])
for _ in range(self.__pool_size)]
self.__update_fitness()
def __select_genome(self):
"""Do a roulette wheel selection and return a genome."""
total_fitness = sum((genome.fitness for genome in self.__pool))
cut = random.uniform(0, total_fitness)
partial_fitness = 0
idx = 0
while partial_fitness < cut:
partial_fitness += self.__pool[idx].fitness
idx += 1
return self.__pool[idx] if idx < len(self.__pool) else self.__pool[self.__pool_size - 1]
def __crossover(self, mother, father):
"""Do a crossover of two genomes and return an offspring."""
if random.random() > self.__crossover_rate:
return mother
crossover_point = int(random.uniform(0, self.__num_genes))
offspring = Genome(0, [])
offspring.genes = mother.genes[0:crossover_point] + father.genes[crossover_point:]
return offspring
def __mutate(self, genome):
for i in range(self.__num_genes):
if random.random() < self.__mutation_rate:
genome.genes[i] = int(round(random.uniform(0, 3)))
def __update_fitness(self):
"""Update the fitness score of each genome."""
for genome in self.__pool:
genome.fitness = self.__level.test_route(genome)
def get_best_genome(self):
"""Return the genome with the best fitness."""
sorted_pool = sorted(self.__pool, key=attrgetter("fitness"), reverse=True)
return sorted_pool[0]
def run(self, verbose=False):
"""Run the genetic algorithm until a solution has been found."""
iteration = 0
while all((x.fitness != 1 for x in self.__pool)):
if verbose:
best_fitness = self.get_best_genome().fitness
print(f"Iteration {iteration}: Best fitness = {best_fitness}")
iteration += 1
self.step()
def step(self):
"""Run one time step of the evolution."""
new_pool = []
for i in range(self.__pool_size):
mother = self.__select_genome()
father = self.__select_genome()
offspring = self.__crossover(mother, father)
self.__mutate(offspring)
new_pool.append(offspring)
self.__pool = new_pool
self.__update_fitness()
def main():
level_one = Level(L1, L1_WIDTH, L1_HEIGHT, start=Point(0, 1),
end=Point(11, 3))
print("Level:")
level_one.print_level()
genome_pool = GenomePool(level_one, pool_size=30, num_genes=70,
crossover_rate=0.7, mutation_rate=0.01)
genome_pool.run()
print()
print("Solution:")
level_one.print_route(genome_pool.get_best_genome())
if __name__ == "__main__":
main()
2 Answers 2
Answers to your questions
The parameters at the beginning could be written as enums, but I couldn't convince myself what the advantage would be (apart from polluting the global namespace?) I thought that the more concise way of writing "N" or "WALL" instead of "Direction.N" or "Object.Wall" added to the readability of the code.
Enums are generally a good idea, since they have some nice properties. In particular, they are in their own distinctive class, and you cannot accidentily compare an enum with something that is not an enum. For example, in your code, both E
and WALL
are just 1
, so E == WALL
will result in True
, which is not what you would expect. So I would definitely use enums here.
Now, you are right that using enums results in more verbose code. But, you can still create variables with short names that you assign enums to, and get the best of both worlds. For example:
class Tile(enum.Enum):
EMPTY = 0
WALL = 1
DOOR = 2
EMPTY = Tile.EMPTY
WALL = Tile.WALL
DOOR = Tile.DOOR
L1 = [[WALL, WALL, ...], [DOOR, EMPTY, ...], ...]
Note that enums in Python don't require you to have numeric values, you can do the following:
class Direction(enum.Enum):
N = Point(0, 1)
E = Point(1, 0)
S = Point(0, -1)
W = Point(-1, 0)
class Tile(enum.Enum):
EMPTY = "."
WALL = "#"
DOOR = "O"
This then avoids the need for __move_dict
and __text_map
.
Class "Level": In principle, I would prefer that the attributes are read-only, but I am not sure how to define this properly. Also, I don't see the point of writing getters and setters here.
See this question for some possible answers.
In the same class, I didn't want to write __move_dict and __text_map twice in test_route and print_route, so I defined it as class variables. I am not sure if this is idiomatic at all.
This is perfectly fine! Avoiding repetition is very important in keeping your code concise and maintainable.
Similarly, test_route and print_route share the same code. I have been thinking if it would be possible to abstract away somehow the common loop, but I have no idea how to do this in Python.
You can create a generator that loops over the path, and for each point yields the position of that point. Then you can use that to simplify loops in test_route()
and print_route()
, like so:
def visit_route(self):
...
for gene in genome.genes:
...
position = new_pos
yield position
def test_route(self, genome):
last_position = self.start
for position in self.visit_route():
last_position = position
return 1 / (1 + distance(last_position, self.end))
def print_route(self):
text_level = [[self.__text_map[elem] for elem in row] for row in self.map]
for position in self.visit_route():
text_level[position.y][position.x] = "*")
for row in text_level:
print ("".join(row))
Avoid storing redundant information
Your class Level
stores width
and height
, but this information is already in map
: height
should be equal to len(map)
, and width
should be equal to len(map[0])
. While there might sometimes be reasons to keep copies of data that is expensive to calculate, the drawback is that you have to ensure the data is consistent. What if I create a Level([[EMPTY]], 100, 100)
?
Similarly, what happens if start_point
and end_point
don't match where the DOOR
s are in the map
? This one is perhaps more tricky. Consider creating a constructor for class Level
that checks whether the given parameters are consistent, or have it automatically derive width
, height
, start_point
and end_point
from the map
.
Representation
############
O....#.....#
#.#.#.#.#..#
#........#.O
############
I would find this representation of a level much more readable if the empty space was printed as space rather than .
############
O # #
# # # # # #
# # O
############
In the code, I would use the same representations as input to the program, so that rather than this
L1 = [[WALL, WALL, WALL, WALL, WALL,
You could define
L1 = [
"############",
"O # #",
"# # # # # #",
"# # O",
"############",
]
And then you would let some function translate that into whatever internal logic you need for your algorithm.
I would also change the symbol for the travelled path from *
to something else that is easier to visually distinguish from the #
used for walls. Perhaps change the walls too.
Code
if 0 <= new_pos.x < self.width:
if 0 <= new_pos.y < self.height:
if self.map[new_pos.y][new_pos.x] != WALL:
position = new_pos
This is not wrong, but it would typically be written using and
instead of several nested ifs, when you have no need for else
cases or other options.
Explore related questions
See similar questions with these tags.