I am trying to implement a genetic algorithm to solve (Diophantine Equation).
For instance, a + 2b + 3c + 4d = 90 where a, b, c, d are positive integers.
After reading some books and following tutorials, I finally wrote this code, but as I am new to programming and GA, I don't know if this is a good implementation.
#include<iostream>
#include <string>
#include <vector>
#include <time.h>
#include <algorithm>
using namespace std;
#define variable 4
#define chromoSize 100000
float total = 0;
struct Equation
{
int eq[variable];
float ev;
float fit;
float p;
};
void init_chrom(vector<Equation> &Original, vector<Equation> &Temp)
{
for(int i=0; i<chromoSize; i++)
{
Equation e;
for(int j=0; j<variable; j++)
{
e.eq[j] = rand() % 30;
e.fit = 0;
}
Original.push_back(e);
}
Temp.resize(chromoSize);
}
void calc_fit(vector<Equation> &orginalCh)
{
for(int i=0; i<chromoSize; i++)
{
int j = 0;
orginalCh[i].ev = abs((orginalCh[i].eq[j] + 2*orginalCh[i].eq[j+1] +
3*orginalCh[i].eq[j+2] + 4*orginalCh[i].eq[j+3]) - 10);
}
}
void select_fit(vector<Equation> &orginalCh)
{
for(int i=0; i<chromoSize; i++)
{
orginalCh[i].fit = 1 /( 1 + orginalCh[i].ev);
total += orginalCh[i].fit;
}
float pro = 0;
for(int i=0; i<chromoSize; i++)
{
pro += orginalCh[i].fit / total;
orginalCh[i].p = pro;
}
}
void _copy(vector<Equation> &p1, vector<Equation> &p2, int s)
{
for(int i=0; i<s; i++)
{
for(int j=0; j<variable; j++)
p2[i].eq[j] = p1[i].eq[j];
p2[i].fit = p1[i].fit;
}
}
void mutate(vector<Equation> &parent2, int i)
{
int size = variable;
int index1 = rand() % size;
int index2 = rand() % size;
int j = rand() % chromoSize;
parent2[i].eq[index1] = parent2[j].eq[index2];
}
void mate(vector<Equation> &parent1, vector<Equation> &parent2)
{
int sub, p1, p2, tSsize = variable;
_copy(parent1, parent2, chromoSize);
for(int i=0; i<chromoSize; i++)
{
p1 = rand() % chromoSize;
p2 = rand() % chromoSize;
sub = rand() % tSsize;
for(int j=sub; j<variable; j++)
parent2[p2].eq[j] = parent1[p1].eq[j];
if(parent2[i].ev > 0.1f)
mutate(parent2, i);
}
}
bool sort_fitness(Equation x, Equation y)
{
return(x.fit > y.fit);
}
void _sort(vector<Equation> &orginalCh)
{
sort(orginalCh.begin(),orginalCh.end(),sort_fitness);
}
void swap(vector<Equation> *&parent1, vector<Equation> *&parent2)
{
vector<Equation> *temp = parent1;
parent1 = parent2;
parent2 = temp;
}
void print(vector<Equation> &originalCh)
{
for(int i=0; i<variable; i++)
{
cout<<originalCh[0].eq[i]<<", ";
}
cout<<" "<<originalCh[0].ev<<" "<<originalCh[0].fit;
cout<<endl;
}
int main()
{
srand(unsigned(time(NULL)));
vector<Equation> chromO, chromT;
vector<Equation> *originalCh, *bufferCh;
init_chrom(chromO, chromT);
originalCh = &chromO;
bufferCh = &chromT;
for(int i=0; i<2000; i++)
{
calc_fit(*originalCh);
select_fit(*originalCh);
_sort(*originalCh);
print(*originalCh);
if((*originalCh)[0].fit == 1) break;
mate(*originalCh,*bufferCh);
swap(*originalCh, *bufferCh);
}
system("pause");
return 0;
}
The code will output something like this:
21, 22, 7, 1, 0 1
The first four numbers are the values of (a,b,c,d) and the fifth number is the evaluation and the last number is the fitness.
UPDATE
After taking @glampert post into consideration and reading more about what he suggested, this is an updated version of the above code.
GeneticAlgorithm.h
#include <vector>
#include <random>
//the variables in the equation, which represnts the (a,b,c,d)
const int VARIABLES = 4;
//the size of chromosoms population
const int CHROMO_SIZE = 10000;
class GA
{
public:
struct Equation
{
int equation[VARIABLES];
float evaluation;
float fit;
float probability;
};
public:
GA();
//initialize the chromosoms
void initChromos(std::vector<Equation> &original, std::vector<Equation> &temp);
//calculate the fitness of chromosoms
void calcFitness(std::vector<Equation> &orginalCh);
//select the fit ones
void selectFitness(std::vector<Equation> &orginalCh);
//copy from parent one to parent two
void copyFromP1toP2(std::vector<Equation> &parent1,
std::vector<Equation> &parent2, int size);
//mutate the chromosoms and select two chromosoms to mate
void mutate(std::vector<Equation> &parent2, int i);
void mate(std::vector<Equation> &parent1, std::vector<Equation> &parent2);
//sort chromosoms by fitness
bool static sortFitness(Equation x, Equation y);
void sortByFitness(std::vector<Equation> &orginalCh);
//swap parent one and parent two for the next generation
void swap(std::vector<Equation> *&parent1, std::vector<Equation> *&parent2);
//print the best fit of every generation
void print(std::vector<Equation> &originalCh);
//generate random number
int randGenerate(int start, int end);
public:
std::default_random_engine dRandom;
private:
float total;
};
Implementation.cpp
#include<iostream>
#include <vector>
#include <algorithm>
#include<random>
#include "GeneticAlgorithm.h"
GA::GA(): total(0),dRandom()
{
}
void GA::initChromos(std::vector<Equation> &original, std::vector<Equation> &temp)
{
for(int i = 0; i < CHROMO_SIZE; i++)
{
GA::Equation e;
for(int j = 0; j < VARIABLES; j++)
{
int randNum = randGenerate(0,40);
e.equation[j]= randNum;
e.fit = 0;
}
original.push_back(e);
}
temp.resize(CHROMO_SIZE);
}
void GA::calcFitness(std::vector<Equation> &orginalCh)
{
for(int i = 0; i < CHROMO_SIZE; i++)
{
int j = 0;
orginalCh[i].evaluation = (float)abs((orginalCh[i].equation[j] + 2 * orginalCh[i].equation[ j + 1 ]
+ 3 * orginalCh[i].equation[ j + 2 ] + 4 * orginalCh[i].equation[ j + 3 ]) - 4 );
}
}
void GA::selectFitness(std::vector<Equation> &orginalCh)
{
for(int i = 0; i < CHROMO_SIZE; i++)
{
orginalCh[i].fit = 1 / ( 1 + orginalCh[i].evaluation);
total += orginalCh[i].fit;
}
float pro = 0;
for(int i = 0; i < CHROMO_SIZE; i++)
{
pro += orginalCh[i].fit / total;
orginalCh[i].probability = pro;
}
}
void GA::copyFromP1toP2(std::vector<Equation> &p1, std::vector<Equation> &p2, int s)
{
for(int i = 0; i < s; i++)
{
for(int j = 0; j < VARIABLES; j++)
{
p2[i].equation[j] = p1[i].equation[j];
}
p2[i].fit = p1[i].fit;
}
}
void GA::mutate(std::vector<Equation> &parent2, int i)
{
int index1 = randGenerate(0, VARIABLES - 1);
int index2 = randGenerate(0, VARIABLES - 1);
int j = randGenerate(0, CHROMO_SIZE - 1);
parent2[i].equation[index1] = parent2[j].equation[index2];
}
void GA::mate(std::vector<Equation> &parent1, std::vector<Equation> &parent2)
{
int sub, p1, p2, p3, tSsize = VARIABLES;
copyFromP1toP2(parent1, parent2, CHROMO_SIZE);
for(int i = 0; i < CHROMO_SIZE; i++)
{
p1 = randGenerate(0, CHROMO_SIZE - 1);
p2 = randGenerate(0, CHROMO_SIZE - 1);;
p3 = randGenerate(0, CHROMO_SIZE - 1);
sub = randGenerate(0, VARIABLES - 1);
for(int j=sub; j<VARIABLES; j++)
{
parent2[p2].equation[j] = abs(parent1[p1].equation[j] - parent1[p3].equation[j]);
}
if(parent2[i].evaluation > 0.5f)
mutate(parent2, i);
}
}
bool GA::sortFitness(Equation x, Equation y)
{
return(x.fit > y.fit);
}
void GA::sortByFitness(std::vector<Equation> &orginalCh)
{
std::sort(orginalCh.begin(),orginalCh.end(),&GA::sortFitness);
}
void GA::swap(std::vector<GA::Equation> *&parent1, std::vector<GA::Equation> *&parent2)
{
std::vector<GA::Equation> *temp = parent1;
parent1 = parent2;
parent2 = temp;
}
void GA::print(std::vector<Equation> &originalCh)
{
for(int i=0; i<VARIABLES; i++)
{
std::cout<< originalCh[0].equation[i] << ", ";
}
std::cout << " fitness = " << originalCh[0].fit;
std::cout<< std::endl;
}
int GA::randGenerate(int start, int end)
{
std::uniform_int_distribution<int> genenNum(start, end - 1);
return genenNum(dRandom);
}
int main()
{
GA ga;
std::vector<GA::Equation> chromO, chromT;
std::vector<GA::Equation> *originalCh, *bufferCh;
ga.initChromos(chromO, chromT);
originalCh = &chromO;
bufferCh = &chromT;
for(int i=0; i<2000; i++)
{
ga.calcFitness(*originalCh);
ga.selectFitness(*originalCh);
ga.sortByFitness(*originalCh);
ga.print(*originalCh);
if((*originalCh)[0].fit == 1) break;
ga.mate(*originalCh,*bufferCh);
ga.swap(*&originalCh, *&bufferCh);
}
std::cin.get();
return 0;
}
2 Answers 2
OK, so first of all, your code is far from being C++. It is more like C using some C++ features, such as std::vector
. I'm not going to refactor all of your code into a class, instead, this is an exercise for you: Take all those loose functions and data and put them in a C++ class. This is one possible interface you can start with:
class GeneticAlgorithm
{
public:
void initChromosomes(vector<Equation> &Original, vector<Equation> &Temp);
void calcFitness(vector<Equation> &orginalCh);
void selectFit(vector<Equation> &orginalCh);
void mutate(vector<Equation> &parent2, int i);
void mate(vector<Equation> &parent1, vector<Equation> &parent2);
void print(const vector<Equation> &originalCh)
private:
// Internal helper methods and data.
// ...
};
Other style and code quality issues:
Names should not start with _
!
Names starting with an underscore _
are reserved for the C++ implementation, so they are a no-no. This is wrong:
void _sort(vector<Equation> &orginalCh)
{
sort(orginalCh.begin(), orginalCh.end(), sort_fitness);
}
I see that you've defined _sort
as a shorthand for the std::sort
algorithm. This is OK, but give the function another name. E.g.: sortByFitness()
which is also a lot more meaningful.
#define
for constants, not recommended:
Macros (AKA defines
) are an inherited C feature that has quite a few drawbacks and problems. One of the most problematic ones is that they don't respect scope. Once you declare a macro, it is visible everywhere. This makes them a very poor choice for declaring constants. C++ offers much better ways. The constants you've defined with macros should be instead defined with const int
or even better constexpr int
if your compiler has C++11 support. But also, the names you gave to the variables are not optimal. Constants are by convention named differently. One common naming convention for constants it using ALL_CAPS
. So I suggest you redeclare:
constexpr int VARIABLE = 4;
constexpr int CHROMO_SIZE = 100000;
Also note that now the constants are strongly typed (int
), which is a nice thing since the compiler can now warn you about unwanted conversions.
Using namespace std?
You have a using namespace std;
at the beginning of the file. This is generally not a good idea, since it defeats the purpose of a namespace, which is allowing identical names to coexist without clashes. I recommend that you remove any using namespace std;
occurrences and call the library functions an types by their fully qualified names, E.g.: std::vector
, std::sort
, std::copy
, etc.
rand()
:
C++11 has a brand new (and awesome!) random number library. If your compiler supports C++11, you should definitely check it out and replace the old and unreliable rand()
that you currently use.
Use of system("pause")
:
This was noted by @Edward in a comment below, I forgot to mention it in my first iteration, so thanks for the reminder:
system("pause")
is a non-portable security nightmare that will run any local program named "pause". It might just pause, or it might format your hard drive. Please use something likestd::cin.get()
instead.
Poor spacing and indentation:
Your code is poorly indented and spaced on some places, also, you are not consistent with your spacing. Take these lines for example:
orginalCh[i].ev = abs((orginalCh[i].eq[j] + 2*orginalCh[i].eq[j+1] +
3*orginalCh[i].eq[j+2] + 4*orginalCh[i].eq[j+3]) - 10);
The above are tightly packed (which I find pretty hard to read, BTW). While this is more spaced:
orginalCh[i].fit = 1 /( 1 + orginalCh[i].ev);
All the code you've posted suffers from indenting problems, I don't know if this problem occurred when you posted it here or if it is like that in your project, but the guideline stays: Be very consistent with spacing and indenting. A well formatted code will be way easier for the reader to "parse".
What is the expected behavior here?
void _copy(vector<Equation> &p1, vector<Equation> &p2, int s)
{
for(int i=0; i<s; i++)
{
for(int j=0; j<variable; j++)
p2[i].eq[j] = p1[i].eq[j];
p2[i].fit = p1[i].fit;
}
}
This function is an example of an issue I frequently see in code. The last statement line, p2[i].fit = p1[i].fit;
, is it meant to belong to last for()
? The way it is indented, it leads the reader to believe so. A more careful look will reveal that it belongs to the first loop, being executed only s
times (which is likely to be the correct). Is that the intended behavior, or did you actually mean this?
for(int i=0; i<s; i++)
{
for(int j=0; j<variable; j++)
{
p2[i].eq[j] = p1[i].eq[j];
p2[i].fit = p1[i].fit;
}
}
Or this?
for(int i=0; i<s; i++)
{
for(int j=0; j<variable; j++)
{
p2[i].eq[j] = p1[i].eq[j];
}
p2[i].fit = p1[i].fit;
}
This guideline is worth more than what most people think: Always enclose if, while, for
and other control flow statements with {}
braces, even if they are single line statements.
Miscellaneous:
Don't bunch up functions like you did here. Put a blank line between each.
bool sort_fitness(Equation x, Equation y)
{
return(x.fit > y.fit);
}
void _sort(vector<Equation> &orginalCh)
{
sort(orginalCh.begin(),orginalCh.end(),sort_fitness);
}
void swap(vector<Equation> *&parent1, vector<Equation> *&parent2)
{
vector<Equation> *temp = parent1;
parent1 = parent2;
parent2 = temp;
}
Better:
bool sort_fitness(Equation x, Equation y)
{
return(x.fit > y.fit);
}
void _sort(vector<Equation> &orginalCh)
{
sort(orginalCh.begin(),orginalCh.end(),sort_fitness);
}
void swap(vector<Equation> *&parent1, vector<Equation> *&parent2)
{
vector<Equation> *temp = parent1;
parent1 = parent2;
parent2 = temp;
}
Falls into the previous spacing/indenting tip, but:
cout<<" "<<originalCh[0].ev<<" "<<originalCh[0].fit;
In my opinion is more readable with some spacing between the <<
operators:
std::cout << " " << originalCh[0].ev << " " << originalCh[0].fit;
Mind your variable naming convention. Consistency it very important, regardless of the style you choose, once you choose one, you must be consistent with it.
You have some parameter variables names with an upper-case first letter, like Original
and Temp
in here:
void init_chrom(vector<Equation> &Original, vector<Equation> &Temp)
Whereas in here
void calc_fit(vector<Equation> &orginalCh)
You've used a lower-case letter for orginalCh
. Variable names are usually declared like orginalCh
, that is, first letter lower-case and each new word starting with an upper-case. This style is sometimes called "camel-case" or "camel-back". Another popular naming convention for function parameter variables in the "snake-case", like this: orginal_char
, Choose one and stick with it throughout your code.
-
\$\begingroup\$ Thank you very much for this. Did you run the program? if you did, do you have any command about that. \$\endgroup\$Mo Moallim– Mo Moallim2014年09月07日 18:40:53 +00:00Commented Sep 7, 2014 at 18:40
-
\$\begingroup\$ No, impossible to run. The code you've posted is missing some variables, as was commented by @Edward, above. You should edit that when you can. \$\endgroup\$glampert– glampert2014年09月07日 18:52:01 +00:00Commented Sep 7, 2014 at 18:52
-
\$\begingroup\$ Alright, here is the complete code link \$\endgroup\$Mo Moallim– Mo Moallim2014年09月07日 18:55:37 +00:00Commented Sep 7, 2014 at 18:55
-
2\$\begingroup\$ Also
system("pause")
is a non-portable security nightmare that will run any local program named "pause." It might just pause, or it might format your hard drive. Please use something likestd::cin.get()
instead. \$\endgroup\$Edward– Edward2014年09月07日 19:33:16 +00:00Commented Sep 7, 2014 at 19:33
Good job! Your code has improved a lot, congrats! But there is still room for more improvement, so I'll now give you a review on the newly edited code.
Redundant #include
s:
GeneticAlgorithm.h
is already including <vector>
and <random>
, so you don't need to include them again in the .cpp
; This only slows down the compilation.
Missing an include guard in the header file:
If you ever plan on including GeneticAlgorithm.h
on any other .cpp
file, you are going to need an include guard or a #pragma once
directive in that header file.
Move related constants into the class:
The const
s are way better than the previous #define
s, but why not place them inside the GA
class? These constants are only used inside the class, so encapsulate them in there. You will have to mark them static
.
class GA
{
private:
//the variables in the equation, which represnts the (a,b,c,d)
static const int VARIABLES = 4;
//the size of chromosoms population
static const int CHROMO_SIZE = 10000;
Make them private if they are not needed outside. A good rule of thumb is to avoid exposing implementation details and keeping interfaces short.
Use an alias or typedef
where appropriate:
These std::vector<GA::Equation>
are the GA chromosomes, I believe (I wrote some GA code a while back, but can't remember much about the data structures...). You can make your code semantically nicer by giving these vectors an alias. This adds no type safety, but can make the code a lot clearer. I like the new C++11 using
alias:
using Chromosomes = std::vector<GA::Equation>;
But if your compiler doesn't understand it, you can fallback to a "classic" typedef
:
typedef std::vector<GA::Equation> Chromosomes;
Then replace the std::vector<GA::Equation>
with Chromosomes
.
Encapsulate more?
Currently, the GA
class has very few state and you pass a lot of vectors to the class methods. Isn't it a better design to encapsulate everything inside the GA
? This could simplify the interface a lot.
Take the variables you have declared in main()
and make them GA
members. Then define a single public function the user of GA
has to call. Something similar to:
void GA::simulate(int numIterations)
{
initChromos();
for (int i = 0; i < numIterations; i++)
{
calcFitness();
selectFitness();
sortByFitness();
print();
if (originalChild[0].fit == 1)
{
break;
}
mate();
}
}
Then all main()
would have to do is:
int main()
{
GA ga;
ga.simulate(2000);
std::cin.get();
return 0;
}
Try our new C++11 lambdas!
You can use a lambda function for your sort predicate in GA::sortByFitness()
, it is a very nice new language feature and it should be used.
void GA::sortByFitness(std::vector<Equation> &orginalCh)
{
std::sort(orginalCh.begin(), orginalCh.end(),
[](const Equation & x, const Equation & y) -> bool { return (x.fit > y.fit); }
);
}
Last thing:
void GA::copyFromP1toP2(std::vector<Equation> &p1, std::vector<Equation> &p2, int s)
The s
parameter, besides being a very poor variable name, is really not necessary, since, in the call site, you are passing a constant to the function:
copyFromP1toP2(parent1, parent2, CHROMO_SIZE);
Just remove that parameter and replace it by CHROMO_SIZE
.