Genetic Algorithm in Python

Question 1

I'm a new programmer, so any help is welcome. Preferably to make it faster, avoid heavy memory usage, and so on.

#! /usr/bin/env python
"""
 This module is a frame work for a Genetic Algorithm.
 :param GenePool: See this documentation for how to use this module..
 :type GenePool: GenePool
 Author: Fernando Rodrigues dos Santos
"""
import random
import math
import pickle
import itertools
import sys
class GenePool():
 """"
 Place a gene in the desired program using GenePool.nextGene().
 Place the genes variables(Gene.dna[id]) in the desired program.
 Reward good choices.
 Punish bad choices.
 Call GenePool.return_result() and pass the gene and the result to start the GA.
 After this the Genetic Algorithm will:
 - Save the top genes without modification and duplicate them(the amount is set by the elitism).
 - Remove the random genes to fit the population(the amount is set by the elitism).
 - Randomly choose genes based on their rank(roulette-wheel selection).
 - Cross-over the selected genes using the two-points cut method.(the amount of genes crossed is set by the
 cross_coefficient).
 - Finally it will apply the mutation to each gene(expect the top-rated).
 - It will repeat until find a convergence or stop if maxIteration is set.
 :param sample: The pattern of genes to be created.
 :type sample: list
 :param population: How many genes will be in the gene_pool.
 :type population: int
 :param cross_coefficient: The percentage of genes to do a random cross-over.
 :type cross_coefficient: float
 :param elitism: The amount of best-fitted genes to remain intact.
 :type elitism: float
 :param mutation: The chance that a dna will randomly mutate.
 :type mutation: float
 :param max_iterations: How many times it will try to find a convergence, if -1 it will run indefinitely(default).
 :type max_iterations: int
 :param save: File to save the pool.
 :type save: str
 """
 def __init__(self, sample, population, cross_coefficient=0.1, elitism=0.05, mutation=0.05, max_iterations=-1,
 save="oldDNA.txt"):
 """
 The generated genes will be based on the sample;
 - bool, True or False
 - int, random number between 0 and1024
 - float, random number between 0.0 and 1.0
 - str, a combination of one letter(a-z) and one number(0-9). E.gene: "a1", "b2", "c2"..."z9".
 Population is the size of the gene_pool.
 See more in the class documentation.
 """
 self.sample = sample
 self.population = population
 self.genes = {}
 self.new_gene_pool = []
 self.cross_coefficient = cross_coefficient
 self.elitism = elitism
 self.mutation = mutation
 self.max_iterations = max_iterations
 self.current_iteration = 0
 self.total_result = 0
 self.calls = -1
 self.save = save
 self.convergence = False
 self.new_genes()
 def new_genes(self):
 while len(self) != self.population:
 dna = []
 for chromosome in self.sample:
 if chromosome == bool:
 dna.append(bool(random.getrandbits(1)))
 elif chromosome == int:
 dna.append(random.randint(0, 2 ** 10))
 elif chromosome == float:
 dna.append(random.random())
 gene = Genes(self, '%i' % (len(self) + 1), dna)
 self[gene.name] = gene
 def next_gene(self):
 """
 :returns: A gene to be used.
 :rtype: Genes
 """
 self.calls += 1
 if self.calls >= self.population:
 self.calls = 0
 self.start_genetic_algorithm()
 return self[self.calls]
 def replace_gene(self, old_gene, new_dna, result, rank):
 new_gene = Genes(self, old_gene.name, new_dna, result=result, rank=rank)
 self[old_gene.name] = new_gene
 self.new_gene_pool.append(new_gene)
 def rated_list(self, cut):
 rated_list = sorted([(gene.rank, gene) for _, gene in self.genes.iteritems()])[::-1]
 selected = int(math.ceil(len(self) * self.elitism))
 top_genes = rated_list[0:selected]
 mid_genes = rated_list[selected:len(self) - selected]
 genes = None
 if cut == 'top':
 genes = top_genes
 elif cut == 'mid':
 genes = mid_genes
 return genes
 def duplicate_top(self):
 top_genes = self.rated_list('top')
 for gene in top_genes:
 gene = gene[1]
 gene.rank /= 2
 gene.result /= 2
 new_gene = Genes(self, "%i" % (len(self) + 1), gene.dna, gene.result, gene.rank)
 self[new_gene.name] = new_gene
 self.new_gene_pool.append(new_gene)
 gene.lock = True
 self.new_gene_pool.append(gene)
 def mutate_mid_genes(self):
 for gene in self:
 new_dna = []
 for chromosome in gene.dna:
 rand = random.random()
 if rand <= self.mutation:
 if type(chromosome) == bool:
 new_dna.append(bool(random.getrandbits(1)))
 if type(chromosome) == int:
 new_dna.append(random.randint(0, 2 ** 10))
 if type(chromosome) == float:
 new_dna.append(random.random())
 else:
 new_dna.append(chromosome)
 gene.dna = new_dna
 def cross_genes(self):
 ranks = [gene.rank for gene in self.genes.itervalues()]
 min_rank = min(ranks)
 max_rank = max(ranks)
 max_tries = 0
 while len(self.new_gene_pool) < self.population or max_tries == 100:
 max_tries += 1
 gene1 = None
 gene2 = None
 while gene1 is None or gene2 is None:
 rand = random.uniform(min_rank, max_rank)
 gene1 = [gene for gene in self.genes.itervalues() if gene.rank >= rand and not gene.lock and gene
 .rank != 1]
 if gene1:
 gene1 = random.choice(gene1)
 pass
 else:
 gene1 = None
 rand = random.uniform(min_rank, max_rank)
 gene2 = [gene for gene in self.genes.itervalues() if gene.rank >= rand and not gene.lock and gene !=
 gene1 and gene.rank != 1]
 if gene2:
 gene2 = random.choice(gene2)
 else:
 gene2 = None
 self.crossover(gene1, gene2)
 for gene in self:
 gene.lock = False
 def crossover(self, gene1, gene2):
 """Two-Point cross-over"""
 cross_point1, cross_point2 = None, None
 while not cross_point2 and not cross_point1:
 cross_point1 = random.randint(0, len(gene1))
 cross_point2 = random.randint(0, len(gene1))
 if cross_point1 == cross_point2 or cross_point1 > cross_point2:
 cross_point1, cross_point2 = None, None
 new_dna1 = gene1[:cross_point1] + gene2[cross_point1:cross_point2] + gene1[cross_point2:]
 new_dna2 = gene2[:cross_point1] + gene1[cross_point1:cross_point2] + gene2[cross_point2:]
 result = (gene1.result + gene2.result) / 2
 rank = (gene1.rank + gene2.rank) / 2
 self.replace_gene(gene1, new_dna1, result, rank)
 self.replace_gene(gene2, new_dna2, result, rank)
 def return_result(self, gene, result):
 """Return here the gene, it's fitness result. Set force to True to run the GA in the gene_pool
 :param gene: The gene which was run.
 :type gene: Genes
 :param result: The fitness result.
 :type result: int
 """
 gene.result += result
 self.total_result += gene.result
 def test_convergence(self, print_result=False):
 if print_result:
 pro = 0
 con = 0
 for gene1, gene2 in itertools.combinations(self.genes.itervalues(), 2):
 if gene1.dna == gene2.dna:
 pro += 1
 else:
 con += 1
 cc = (con + 1) / (pro + 1)
 print "Converged rate: 1:%i" % cc
 if self.convergence or self.current_iteration == self.max_iterations:
 for gene in self:
 print "Convergence Found: \nGene: %s\nDNA: %s" % (gene.name, gene.dna)
 sys.exit("End of the line")
 def reset_genes(self):
 for gene in self:
 gene.rank = 0
 gene.result = 0
 gene.lock = False
 self.total_result = 0
 def save_results(self):
 f = open(self.save, 'w')
 pickle.dump(self, f)
 f.close()
 def update_result(self):
 self.total_result = 0
 self.genes = {}
 for gene in self.new_gene_pool:
 new_gene = Genes(self, '%i' % (len(self.genes) + 1), gene.dna)
 self.genes[new_gene.name] = new_gene
 self.new_gene_pool = []
 def start_genetic_algorithm(self):
 [gene.update_rank() for gene in self.genes.itervalues()]
 self.current_iteration += 1
 self.duplicate_top()
 self.cross_genes()
 self.update_result()
 self.test_convergence(print_result=True)
 self.mutate_mid_genes()
 # self.saveResults()
 return self.convergence
 def __getitem__(self, item):
 if type(item) == str:
 return self.genes[item]
 if type(item) == int:
 return self[sorted(self.genes)[item]]
 def __setitem__(self, key, value):
 self.genes[key] = value
 def __len__(self):
 return len(self.genes)
class Genes():
 """
 A gene which mutates based on it's gene_pool settings
 :param gene_pool: Where all genes are stored and the GA will run
 :type gene_pool: GenePool
 :param dna: The core of the gene, it's based on the sample set on the gene_pool.
 :type dna: list
 :param result: The current fitness result of the gene.
 :type result: int
 :param rank: The result from 0.0 to 1.0 of the gene in the gene_pool.
 :type rank: float
 """
 def __init__(self, gene_pool, name, dna, result=0, rank=0):
 """should be instantiated by genePool.newGenes()"""
 self.name = name
 self.gene_pool = gene_pool
 self.dna = dna
 self.result = result
 self.rank = rank
 self.lock = False
 def update_rank(self):
 self.rank = self.result / float(self.gene_pool.total_result)
 return self.rank
 def __len__(self):
 return len(self.dna)
 def __getitem__(self, item):
 return self.dna[item]
gene_pool = GenePool([int] * 3, 100)
while True:
 gene = gene_pool.next_gene()
 dna = gene.dna
 result = 1
 if dna[0] <= 100:
 result += 10
 if dna[0] == 100:
 result += 100
 if dna[1] >= 200:
 result += 10
 if dna[1] == 200:
 result += 100
 if 100 <= dna[2] <= 200:
 result += 10
 if dna[2] == 150:
 result += 100
 if dna[0] == 100 and dna[1] == 200 and dna[2] == 150:
 gene_pool.convergence = True
 gene_pool.return_result(gene, result)

Question 2

Is there a specific performance issue you've noticed? Have you tried anything to make it faster and/or reduce memory footprint? Any profiling? Do you have an example we can run?

Question 3

If I use a population of 100 and my the lenght of the dna is 10 it takes around 30 seconds for a calculation. I'm trying to shrink uses of for loops and the like, but I'm not very used to any other ways of reducing performance hits. Haven't done any test on memory. I don't know how to do it.

Question 4

You should start by profiling to determine where exactly the time is being taken: see the profile module. You can also do memory profiling, although not from the standard library; see e.g. memory_profiler.

Question 5

As a side note: Your code doesn't follow PEP 8. You can use pep8.py (and autopep8.py), which is also included in PyDev, to check where you don't conform.

Question 6

Sorry about that. I removed the commented out remove_to_fit method but had forgot to remove when I I was calling. It's working now.

Question 7

One thing you should do is not place code you always want run at the lowest level in the file, you should place it in a function called main() and call main() with if __name__ == "__main__":. This will help you when you start creating modules and importing them, and is discussed here.

Other than this, your code is very neat, and it is good that it follows PEP-8 standards. I cannot comment on the performance as I do not have Python in my system at the time of writing this.

user34073user34073 · Answer 1 · 2015-03-14 21:52:26Z

One thing you should do is not place code you always want run at the lowest level in the file, you should place it in a function called main() and call main() with if __name__ == "__main__":. This will help you when you start creating modules and importing them, and is discussed here.

Other than this, your code is very neat, and it is good that it follows PEP-8 standards. I cannot comment on the performance as I do not have Python in my system at the time of writing this.

Stack Exchange Network

Genetic Algorithm in Python

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Genetic Algorithm in Python

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions