(The entire project lives there.)
This time, I have made the previous version of the GA for TSP run faster by around the factor of 6+. The idea is to cache the tour costs with the actual tour objects and cache the hash codes of each tour.
GeneticTSPSolverV2.java
:
package com.github.coderodde.tsp.impl;
import com.github.coderodde.tsp.AllPairsShortestPathData;
import com.github.coderodde.tsp.Node;
import com.github.coderodde.tsp.Utils;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Random;
import java.util.Set;
import com.github.coderodde.tsp.TSPSolver;
/**
* This class implements the genetic (optimized) TSP solver.
*
* @author Rodion "rodde" Efremov
* @version 1.6 (Apr 8, 2022)
* @since 1.6 (Apr 8, 2022)
*/
public final class GeneticTSPSolverV2 implements TSPSolver {
private static final class Tour {
final List<Node> nodes;
final double cost;
private final int hashCode;
Tour(List<Node> nodes, double cost) {
this.nodes = nodes;
this.cost = cost;
this.hashCode = nodes.hashCode();
}
@Override
public int hashCode() {
return hashCode;
}
@Override
public boolean equals(Object o) {
if (!(o instanceof Tour)) {
return false;
}
Tour other = (Tour) o;
return nodes.equals(other.nodes);
}
}
// The number of parents of each individual (tour). This is trisexual.
private static final int NUMBER_OF_PARENTS = 3;
// The minimum number of generations. If 1 is passed, only a randomly
// (initial) generation will be returned.
private static final int MINIMUM_NUMBER_OF_GENERATIONS = 1;
// The minimum population size.
private static final int MINIMUM_POPULATION_SIZE = 5;
// The minimum number of vertices in the input graph. We need this in order
// to make sure that we have sufficient amount of individuals in each
// generation.
private static final int MINIMUM_GRAPH_SIZE = 4;
private static final int DEFAULT_NUMBER_OF_GENERATIONS = 10;
private static final int DEFAULT_POPULATION_SIZE = 10;
private final int numberOfGenerations;
private final int populationSize;
/**
* Constructs a genetic solver with given parameters.
*
* @param numberOfGenerations the number of generations.
* @param populationSize the population size at each generation.
*/
public GeneticTSPSolverV2(int numberOfGenerations,
int populationSize) {
checkNumberOfGenerations(numberOfGenerations);
checkPopulationSize(populationSize);
this.numberOfGenerations = numberOfGenerations;
this.populationSize = populationSize;
}
public GeneticTSPSolverV2() {
this(DEFAULT_NUMBER_OF_GENERATIONS,
DEFAULT_POPULATION_SIZE);
}
/**
* Returns an (approximate) solution to the TSP problem via genetic
* algorithm.
*
* @param seedNode the seed node of the graph.
* @return an approximate solution to the TSP problem instance.
*/
public TSPSolver.Solution findTSPSolution(Node seedNode) {
Objects.requireNonNull(seedNode, "The seed node is null.");
List<Node> reachableNodes =
GraphExpander.computeReachableNodesFrom(seedNode);
checkGraphSize(reachableNodes.size());
Random random = new Random();
AllPairsShortestPathData allPairsData =
AllPairsShortestPathSolver.solve(reachableNodes);
List<Tour> population =
computeInitialGeneration(reachableNodes,
populationSize,
random);
for (int generationNumber = 1;
generationNumber < numberOfGenerations;
generationNumber++) {
List<Tour> nextPopulation =
evolvePopulation(population, allPairsData, random);
population = nextPopulation;
}
Tour fittestTour = getFittestTour(population, allPairsData);
return new TSPSolver.Solution(fittestTour.nodes, allPairsData);
}
private static Tour getFittestTour(List<Tour> population,
AllPairsShortestPathData data) {
Tour fittestTour = null;
double fittestTourCost = Double.MAX_VALUE;
for (Tour tour : population) {
double tourCost = tour.cost;
if (fittestTourCost > tourCost) {
fittestTourCost = tourCost;
fittestTour = tour;
}
}
return fittestTour;
}
private static List<Tour>
evolvePopulation(List<Tour> population,
AllPairsShortestPathData data,
Random random) {
List<Tour> nextPopulation = new ArrayList<>(population.size());
while (nextPopulation.size() < population.size()) {
Tour[] parents = getParents(population, data, random);
nextPopulation.add(breedTour(parents, random, data));
}
return nextPopulation;
}
private static Tour breedTour(Tour[] parents,
Random random,
AllPairsShortestPathData data) {
int tourLength = parents[0].nodes.size();
int totalGeneLength = tourLength / NUMBER_OF_PARENTS;
int gene1Length = totalGeneLength;
int gene2Length = totalGeneLength;
int gene3Length = tourLength - gene1Length - gene2Length;
List<Node> tour = new ArrayList<>(totalGeneLength);
List<Node> genes1 = new ArrayList<>(gene1Length);
List<Node> genes2 = new ArrayList<>(gene2Length);
List<Node> genes3 = new ArrayList<>(gene3Length);
Set<Node> nodeSet = new HashSet<>();
for (int i = 0; i < gene1Length; ++i) {
Node node = parents[0].nodes.get(i);
nodeSet.add(node);
genes1.add(node);
}
int index = 0;
while (genes2.size() < gene2Length) {
Node node = parents[1].nodes.get(index++);
if (!nodeSet.contains(node)) {
nodeSet.add(node);
genes2.add(node);
}
}
index = 0;
while (genes3.size() < gene3Length) {
Node node = parents[2].nodes.get(index++);
if (!nodeSet.contains(node)) {
nodeSet.add(node);
genes3.add(node);
}
}
List<List<Node>> genes = new ArrayList<>(NUMBER_OF_PARENTS);
genes.add(genes1);
genes.add(genes2);
genes.add(genes3);
Collections.<List<Node>>shuffle(genes, random);
tour.addAll(genes.get(0));
tour.addAll(genes.get(1));
tour.addAll(genes.get(2));
return new Tour(tour, Utils.getTourCost(tour, data));
}
private static double getMaximumTourCost(List<Tour> population,
AllPairsShortestPathData data) {
double fittestTourCost = -1.0;
for (Tour tour : population) {
fittestTourCost = Math.max(fittestTourCost, tour.cost);
}
return fittestTourCost;
}
private static Tour[] getParents(List<Tour> population,
AllPairsShortestPathData data,
Random random) {
ProbabilityDistribution<Tour> distribution =
new ProbabilityDistribution<>(random);
double maximumTourCost = getMaximumTourCost(population, data);
for (Tour tour : population) {
double tourWeight = 1.2 * maximumTourCost - tour.cost;
distribution.addElement(tour, tourWeight);
}
Tour[] parents = new Tour[NUMBER_OF_PARENTS];
Set<Tour> parentFilter = new HashSet<>();
int index = 0;
while (parentFilter.size() < NUMBER_OF_PARENTS) {
Tour parent = distribution.sampleElement();
if (!parentFilter.contains(parent)) {
parentFilter.add(parent);
distribution.removeElement(parent);
parents[index++] = parent;
}
}
return parents;
}
private static List<Tour>
computeInitialGeneration(List<Node> reachableNodes,
int populationSize,
Random random) {
List<Tour> initialGeneration = new ArrayList<>(populationSize);
for (int i = 0; i < populationSize; ++i) {
List<Node> nodes = new ArrayList<>(reachableNodes);
Collections.shuffle(nodes, random);
Tour tour = new Tour(nodes, 0.0);
initialGeneration.add(tour);
}
return initialGeneration;
}
protected static void checkNumberOfGenerations(int numberOfGenerations) {
if (numberOfGenerations < MINIMUM_NUMBER_OF_GENERATIONS) {
throw new IllegalArgumentException(
"Number of generations is too small: "
+ numberOfGenerations
+ ". Must be at least "
+ MINIMUM_NUMBER_OF_GENERATIONS
+ ".");
}
}
protected static void checkPopulationSize(int populationSize) {
if (populationSize < MINIMUM_POPULATION_SIZE) {
throw new IllegalArgumentException(
"Population size is too small: "
+ populationSize
+ ". Must be at least "
+ MINIMUM_POPULATION_SIZE
+ ".");
}
}
protected static void checkGraphSize(int graphSize) {
if (graphSize < MINIMUM_GRAPH_SIZE) {
throw new IllegalArgumentException(
"The graph size is "
+ graphSize
+ ". Minimum allowed size is "
+ MINIMUM_GRAPH_SIZE
+ "."
);
}
}
}
Typical output
On my PC, I get the following output:
GA duration: 712 ms.
Brute-force duration: 8127 ms.
---
GA cost: 1298.5923370671355
Brute-force cost: 1187.226303336163
Cost ratio: 1.093803543113919
Critique request
As always, I would like to hear any comments on how to improve my code.
2 Answers 2
List<Tour> nextPopulation =
evolvePopulation(population, allPairsData, random);
population = nextPopulation;
The nextPopulation
variable doesn't serve a purpose. This is the same:
population = evolvePopulation(population, allPairsData, random);
In:
private static Tour getFittestTour(List<Tour> population,
AllPairsShortestPathData data) {
Parameter data
is unused.
Consider using streams.
E.g. your ParallelGeneticTSPSolver class has:
Tour getMinimalTour(List<Tour> population) {
Tour minimalTour = null;
double minimalTourCost = Double.POSITIVE_INFINITY;
for (Tour tour : population) {
double tentativeTourCost = tour.cost;
if (minimalTourCost > tentativeTourCost) {
minimalTourCost = tentativeTourCost;
minimalTour = tour;
}
}
return minimalTour;
}
It appears to be the same as getFittestTour
You only need this once, and it can be rewritten using the Stream API in a much more concise way:
Tour getMinimalTour(List<Tour> population) {
return population.stream()
.min((a, b) -> Double.compare(a.cost, b.cost))
.get();
}
ParallelGeneticTSPSolver also has:
private double getMaximumTourCost(List<Tour> population,
AllPairsShortestPathData data) {
double fittestTourCost = -1.0;
for (Tour tour : population) {
fittestTourCost = Math.max(fittestTourCost, tour.cost);
}
return fittestTourCost;
}
which also has an unused parameter data
and could be simplified greatly using the stream APIs:
private double getMaximumTourCost(List<Tour> population) {
return population.stream().mapToDouble(t -> t.cost).max().getAsDouble();
}
This code:
for (int i = 0; i < gene1Length; ++i) {
Node node = parents[0].nodes.get(i);
nodeSet.add(node);
genes1.add(node);
}
int index = 0;
while (genes2.size() < gene2Length) {
Node node = parents[1].nodes.get(index++);
if (!nodeSet.contains(node)) {
nodeSet.add(node);
genes2.add(node);
}
}
index = 0;
while (genes3.size() < gene3Length) {
Node node = parents[2].nodes.get(index++);
if (!nodeSet.contains(node)) {
nodeSet.add(node);
genes3.add(node);
}
}
is a little awkward. You could decrement the gene[2/3]Length variable when you add nodes and break when it is zero, instead of calling the size() method every iteration.
(This way you only need the comparison for iterations that actually change the size.):
for (;;) {
Node node = parents[2].nodes.get(index++);
if (!nodeSet.contains(node)) {
nodeSet.add(node);
genes3.add(node);
if (--gene3Length == 0)
break;
}
}
I assume something in the algorithm assures that index
won't go out of bounds.
If you are trying to tune this to get the very best performance, I suggest making a benchmark with JMH before you start tweaking.
Some other things to try for speed ups, since now we break the loop differently, we can use for-each (no need to dereference parents array inside the loop, no need for index variable at all)
for(Node node : parent[2].nodes) {
if (!nodeSet.contains(node)) {
nodeSet.add(node);
genes3.add(node);
if (--gene3Length == 0)
break;
}
}
Here again the stream APIs would make this easier to read.
Is this not equivalent (I'm assuming that the nodes list for each parent never has duplicate nodes, so nodeSet
doesn't need to be updated on-the-fly):
List<Node> genes1 = new ArrayList<>(parents[0].nodes.subList(0, gene1Length));
nodeSet.addAll(genes1);
List<Node> genes2 = parents[1].nodes.stream()
.filter(n -> !nodeSet.contains(n))
.limit(gene2Length).toList();
nodeSet.addAll(genes2);
List<Node> genes3 = parents[2].nodes.stream()
.filter(n -> !nodeSet.contains(n))
.limit(gene3Length).toList();
nodeSet.addAll(genes3);
Source code should be (doc)commented - especially interfaces (more than just
interface
s).Terminology: Some insist TSP do be defined on complete graphs, directed (asymmetrical TSP) or not.
en.wikipedia argues compellingly:adding a sufficiently long edge will complete the graph without affecting the optimal tour
Using shortest path cost independent of a direct edge may be turning the problem solved into something not true TSP.
GeneticTSPSolverV2.java - is that a variation(/version?!?) designation in the name of a file/public class?
Judging from the co-existence of GeneticTSPSolverV1 and the amount of common code, it is.
Copy&paste is not the way to go about variations.
The name might indicate unseparated concerns:- genetic solver (I briefly looked for generic Java implementations:
(initial population), fitness evaluation, crossover, mutation...)
Using a generic one would make overlooking extreme choices of crossover and mutation ratios less likely:
Not including specimen from one generation in the next, your crossover ratio is 1.
Imagine the optimal tour being found in the semi-final generation...
(I haven't come across a crossover procedure for TSP allowing for a commendably low mutation ratio.) - for TSP (so this might be a specialisation/derivation)
- V1/2 specialisation/derivation, if that
It may be interesting to see commonalities with other TSP solvers, say, Christofides&Serdyukov. Pity about Java&multiple inheritance.
- genetic solver (I briefly looked for generic Java implementations:
breedTour()
is quite asymmetric in the influence of parents to child(offspring?)
More often than not, I've seen attempts to alleviate this by creating one descendant per cyclic shift of parents (one per permutation sounds explosive).
It contains repetitive code.There are several instances of the antipattern
java.util.Set m; // or Map Object key; if (!m.contains(key)) { // just use if (m.add(key... m.add(key... }
evolvePopulation()
generates the same distribution viagetParents()
over and again - unfortunate division of labour (and potential cause for all those calls toUtils.getTourCost()
)
Remark on Brute Force:
With a permutation generation algorithm using a single swap of adjacent elements like Steinhaus–Johnson–Trotter–Even, the cost for a tour head→a→b→tail can be updated to the next tour head→b→a→tail by "subtracting the former arrows and adding the new" - independent of tour length.
Copy&paste not being the way to introduce variants, what is?
Starting from a class GeneticTSPSolver
, the obvious
/** basic implementation */
class GeneticTSPSolverBase extends GeneticTSPSolver {
public GeneticTSPSolverBase(int numberOfGenerations,
int populationSize,
Random random) {
super(numberOfGenerations,
populationSize,
random);
}
public GeneticTSPSolverBase() {
this(DEFAULT_NUMBER_OF_GENERATIONS,
DEFAULT_POPULATION_SIZE,
null);
}
...
}
/** variant: introduce <code>Tour</code> caching cost */
class GeneticTSPSolverV2 extends GeneticTSPSolver {
public GeneticTSPSolverV2(int numberOfGenerations,
int populationSize,
Random random) {
super(numberOfGenerations,
populationSize,
random);
}
public GeneticTSPSolverV2() { // super() is not the way...
this(DEFAULT_NUMBER_OF_GENERATIONS,
DEFAULT_POPULATION_SIZE,
null);
}
/** Keeps <code>elements</code> */
static class HashCodeCachingList<E> extends ArrayList<E>
{
private final int hashCode;
HashCodeCachingList(int initialCapacity) {
super(initialCapacity);
this.hashCode = super.hashCode();
}
HashCodeCachingList(E[] elements) {
super(elements.length);
for (E e: elements)
add(e);
this.hashCode = super.hashCode();
}
HashCodeCachingList(Collection<E> elements) {
super(elements);
this.hashCode = super.hashCode();
}
@Override
public int hashCode() { return hashCode; }
protected boolean _equals(Object other) {
return super.equals(other);
}
@Override
public boolean equals(Object other) {
return other instanceof HashCodeCachingList
&& super.equals(other);
}
}
/** Keeps <code>cost</code> */
private static class Tour extends HashCodeCachingList<Node>
{
final double cost;
Tour(Node[] elements, double cost) {
super(elements);
this.cost = cost;
}
Tour(Collection<Node> elements, double cost) {
super(elements); // I've seen this code before
this.cost = cost;
}
@Override
public boolean equals(Object other) {
return other instanceof Tour && super._equals(other);
}
}
...
}
(Thanks a bundle, Java, for constructors not being inherited. And arrays not being first class objects.)
I tinkered some with breedTour()
, one problem being there aren't too many parents between first and last, which might better be handled specially:
List<Node>
breedTour(List<Node>[] parents, List<Node>[] children, Random random) {
final int
nParents = parents.length,
tourLength = parents[0].size(),
floor = tourLength / nParents,
remainder = tourLength % nParents;
int []geneLengths = new int[nParents];
Arrays.fill(geneLengths, floor);
geneLengths[0] += remainder;
List<Node>
genes[] = new List[nParents];
Set<Node> // tune 1st trip? add() called, anyway
// checkset = java.util.Collections.emptySet(),
nodeSet = new HashSet<>(tourLength);
for (int i = 0 ; i < genes.length ; i++) {
int fromParent = geneLengths[i];
List<Node> gene = genes[i] = new ArrayList<>(fromParent);
for (Node node: parents[i])
if (// nParents-1 <= i || // try tune last trip
nodeSet.add(node)) {
gene.add(node);
if (--fromParent <= 0)
break;
}
}
// nodeSet.addAll(genes1);
// for (Node node: parents[1])
// if (nodeSet.add(node)) {
// genes2.add(node);
// if (ceilGeneLength <= genes2.size())
// break;
// }
// for (Node node: parents[2])
// if (nodeSet.add(node))
// genes3.add(node);
// if (ceilGeneLength != genes2.size())
// System.err.println("darn");
Collections.shuffle(Arrays.asList(genes), random);
// passing a Class to instantiate(Tour)/a Constructor got messy here...
// better return an array?
List<Node> tour = new ArrayList<>(tourLength);
for (Collection<Node> gene: genes)
tour.addAll(gene);
return tour;
}
With a non-zero survival ratio, I'm anything but sold on the concept of generations: It may be worth a try to do without.
-
\$\begingroup\$ (I tinkered with a pop size 50 — n¹·5, perhaps?) \$\endgroup\$greybeard– greybeard2022年04月11日 16:37:10 +00:00Commented Apr 11, 2022 at 16:37
Explore related questions
See similar questions with these tags.
breedTour()
is by no means equal, and I fail to quite see gene but forparents[0]
. Making the "most fragmented" last one the longest one looks odd. Adding tonodeSet
when building the last gene would only make sense if elements in the last parent weren't unique?! \$\endgroup\$