As a pet project I implemented a neural network. I am looking for general advice, since I am a self tought programmer, but I have few specific questions I stated at the end of this post. Let's dig into it:
Code
Neuron.hpp
:
//------------------------------------------------
// NEURON.HPP
//------------------------------------------------
#ifndef NEURON_HPP
#define NEURON_HPP
// Libraries
#include <vector>
#include <functional>
// My Libraries
#include "fact.hpp"
// Typedefs and Structures
class neuron;
struct origin
{
neuron& linked_neuron;
double w;
origin(neuron& ln, double win) : linked_neuron(ln), w(win) {};
};
struct target
{
neuron& linked_neuron;
double& w;
target(neuron& ln, double& win) : linked_neuron(ln), w(win) {};
};
// Neuron : A single neutron in the network
class neuron
{
private:
double _act;
double _input;
double _delta;
std::function<double(double)> _fact;
std::function<double(double)> _dfact;
std::vector<origin> _origins;
std::vector<target> _targets;
public:
double out;
neuron(double (*fact)(double) = essential::fact_linear,
double (*dfact)(double) = essential::dfact_linear )
: _act(0), _input(0), _delta(0), _fact(fact), _dfact(dfact), out(0){};
neuron& operator= (const neuron& rhs);
void activate(void);
void calculate_delta(void);
void calculate_delta(double diff_to_example);
void train_connection(double learning_rate);
friend void link_neurons(neuron& predecessor, neuron& successor, double rate);
};
#endif
Neuron.cpp
:
//------------------------------------------------
// NEURON.CPP
//------------------------------------------------
#include "neuron.hpp"
// Libraries
#include "config.hpp"
// CODE
using namespace std;
neuron& neuron::operator= (const neuron& rhs)
{
if(this != &rhs)
{
for (auto& rhso: rhs._origins)
{
_origins.push_back(origin(rhso.linked_neuron, rhso.w) );
}
for (auto& rhst: rhs._targets)
{
_targets.push_back(target(rhst.linked_neuron, rhst.w) );
}
_act = rhs._act;
_input = rhs._input;
_delta = rhs._delta;
_fact = rhs._fact;
_dfact = rhs._fact;
out = rhs.out;
}
return *this;
}
void neuron::activate()
{
_input = 0;
for (auto const& origin: _origins)
{
_input += origin.linked_neuron.out * origin.w;
}
_act = _fact(_input);
out = _act;
}
void neuron::calculate_delta()
{
_delta = 0;
for (auto const& target: _targets)
{
_delta += target.linked_neuron._delta * target.w;
}
_delta *= _dfact(_input);
}
void neuron::calculate_delta(double diff_to_example)
{
_delta = _dfact(_input) * diff_to_example;
}
void neuron::train_connection(double learning_rate)
{
for (auto& origin: _origins)
{
origin.w += learning_rate * origin.linked_neuron.out * _delta;
}
}
void link_neurons(neuron& predecessor, neuron& successor, double rate)
{
successor._origins.push_back(origin(predecessor, rate) );
predecessor._targets.push_back(target(successor, successor._origins.back().w) );
}
(I know, _act
is without purpose right now. See below)
Network.hpp
(updated; old network.hpp):
//------------------------------------------------
// NETWORK.HPP
//------------------------------------------------
#ifndef NETWORK_HPP
#define NETWORK_HPP
// Libraries
#include <vector>
// My Libraries
#include "neuron.hpp"
#include "rne.hpp"
#include "defs.hpp"
// Typedefs and Structures
enum brain_param { FEED_FORWARD, FULL_FORWARD };
enum time_param { OFFLINE, ONLINE };
enum method_param { BACKPROP, RPROP };
struct learning_param
{
time_param timing;
method_param method;
learning_param(time_param t_in, method_param m_in) : timing(t_in), method(m_in) {}
};
// Network : Controls the dynamic of the neural network
class network
{
private:
std::vector<unsigned int> _network_structure;
std::vector<std::vector<neuron>> _neurons;
neuron _bias;
rne _random_engine;
void _initialize_random_rates();
void _reset_netoutputs();
void _calculate_netoutputs(std::vector<double> input);
void _train_network(std::vector<double> errors, double learning_rate, double layer_fact);
public:
network(std::vector<unsigned int> neurons_per_layer, rne &random_engine);
double train_epoch(testcases tests, double learning_rate = 0.01, double layer_fact = 1.2);
//testcases from defs.hpp
std::vector<double> solve(std::vector<double> input);
brain_param brain_structure;
learning_param learning_strategy;
bool BIAS_NEURON;
};
#endif
Network.cpp
(updated, old network.cpp):
//------------------------------------------------
// NETWORK.CPP
//------------------------------------------------
#include "network.hpp"
// Libraries
#include <iostream>
#include <algorithm> //std::shuffle
#include "config.hpp"
#include "errfct.hpp"
// CODE
using namespace std;
// Constructors
network::network(vector<unsigned int> neurons_per_layer, rne &random_engine) :
_network_structure(neurons_per_layer), _neurons(neurons_per_layer.size() ),
_random_engine(random_engine), brain_structure(FEED_FORWARD),
learning_strategy(OFFLINE, BACKPROP), BIAS_NEURON(true)
{
for (unsigned int i = 0; i < neurons_per_layer.size() - 1; ++i)
{
_neurons[i].resize(neurons_per_layer[i], neuron(essential::fact_tanh, essential::dfact_tanh) );
}
_neurons.back().resize(neurons_per_layer.back(), neuron(essential::fact_linear, essential::dfact_linear) );
_initialize_random_rates();
}
// Methods
void network::_initialize_random_rates()
{
double rate;
for (vector<vector<neuron>>::iterator layer = _neurons.begin() + 1;
layer != _neurons.end(); ++layer)
{
switch (brain_structure)
{
case FEED_FORWARD:
{
vector<vector<neuron>>::iterator prev_layer = layer - 1;
for (auto& successors: *layer)
{
for (auto& predecessors: *prev_layer)
{
rate = _random_engine.splitrange(0.05,0.5);
link_neurons(predecessors, successors, rate);
}
// Link every neuron against the bias neuron. If BIAS_NEUROM == false
// the output of _bias is set to 0, so that the link becomes unimportant.
// This is used that the _bias neuron can be switched on and off.
rate = _random_engine.splitrange(0.05,0.5);
link_neurons(_bias, successors, rate);
}
break;
} // END FEED_FORWARD
case FULL_FORWARD:
{
for (vector<vector<neuron>>::iterator prev_layers = _neurons.begin();
prev_layers != layer; ++prev_layers)
{
for (auto& successors: *layer)
{
for (auto& predecessors: *prev_layers)
{
rate = _random_engine.splitrange(0.05,0.5);
link_neurons(predecessors, successors, rate);
}
// Link every neuron against the bias neuron. If BIAS_NEUROM == false
// the output of _bias is set to 0, so that the link becomes unimportant.
// This is used that the _bias neuron can be switched on and off.
rate = _random_engine.splitrange(0.05,0.5);
link_neurons(_bias, successors, rate);
}
}
break;
} // END FULL_FORWARD
default:
{
}
} // END switch
}
}
void network::_reset_netoutputs()
{
for (auto& layer: _neurons)
{
for (auto& neuron: layer)
{
neuron.out = 0;
}
}
}
void network::_calculate_netoutputs(vector<double> input)
{
unsigned input_size = input.size();
if (input_size != _network_structure.front() )
{
//Throw an error --- will be implemented
}
for (unsigned i = 0; i < input_size; ++i)
{
_neurons.front()[i].out = input[i];
}
if (BIAS_NEURON)
{
_bias.out = 1;
// The Bias neuron is an additional neuron of the input layer, and always sends a constant signal
}
else
{
_bias.out = 0;
}
for (vector<vector<neuron>>::iterator layer = ++_neurons.begin(); layer != _neurons.end(); ++layer)
{
for (auto& neuron: *layer)
{
neuron.activate();
}
}
}
void network::_train_network(vector<double> errors, double learning_rate, double layer_fact)
{
// Train the output neurons
for (unsigned i = 0; i < _network_structure.back(); ++i)
{
_neurons.back()[i].calculate_delta(errors[i]);
_neurons.back()[i].train_connection(learning_rate);
}
// Train all, but the input/output neurons
for (vector<vector<neuron>>::reverse_iterator rlayer = ++_neurons.rbegin();
rlayer != --_neurons.rend(); ++rlayer)
{
learning_rate *= layer_fact;
for (auto& neuron: *rlayer)
{
neuron.calculate_delta();
neuron.train_connection(learning_rate);
}
}
}
double network::train_epoch(testcases tests, double learning_rate, double layer_fact)
{
vector<vector<double>> errors;
switch (learning_strategy.timing)
{
case OFFLINE:
{
for (auto& test: tests)
{
if (test[0].size() != _network_structure.front() || test[1].size() != _network_structure.back() )
{
//Throw an error
}
_reset_netoutputs();
_calculate_netoutputs(test.front() );
vector<double> tmp_error(_network_structure.back() );
for (unsigned j = 0; j < tmp_error.size(); ++j)
{
tmp_error[j] = test.back()[j] - _neurons.back()[j].out;
}
errors.push_back(tmp_error);
}
vector<double> avg_error(_network_structure.back(), 0);
for (unsigned i = 0; i < _network_structure.back(); ++i)
{
for (auto const& error: errors)
{
avg_error[i] += error[i];
}
avg_error[i] /= (double)errors.size();
}
_train_network(avg_error, learning_rate, layer_fact);
break;
} // END OFFLINE
case ONLINE:
{
std::shuffle(tests.begin(), tests.end(), _random_engine.get_engine() );
for (auto& test: tests)
{
if (test[0].size() != _network_structure.front() || test[1].size() != _network_structure.back() )
{
//Throw an error
}
_reset_netoutputs();
_calculate_netoutputs(test.front() );
vector<double> tmp_error(_network_structure.back() );
for (unsigned j = 0; j < tmp_error.size(); ++j)
{
tmp_error[j] = test.back()[j] - _neurons.back()[j].out;
}
errors.push_back(tmp_error);
_train_network(errors.back(), learning_rate, layer_fact);
}
break;
} // END ONLINE
default:
{
}
}
vector<double> specific_errors;
for (auto const& error: errors)
{
specific_errors.push_back(_errfct(error) );
}
double totalerror = 0;
for (auto& specific_error: specific_errors)
{
totalerror += specific_error;
}
return totalerror;
}
vector<double> network::solve(vector<double> input)
{
_reset_netoutputs();
_calculate_netoutputs(input);
vector<double> solution;
for (auto const& output_neuron: _neurons.back() )
{
solution.push_back(output_neuron.out);
}
return solution;
}
These are the two interesting classes, yet there is more code around them. I hesitate to post it all here, to keep everything compact, so I will only provide links right now --- if needed I will edit my question.
- network repository (The source I posted here is from commit d37db4b / 09.11)
EDIT the old source files
fact.hpp (old)
fact.cpp (old)
errfct.hpp (old)
errfct.cpp (old)
rne.hpp (old)
rne.cpp (old)
defs.hpp (old)
config.hpp (old)
main.cpp (old)
Personal Statement
So what went on in my mind during coding? Well, all in all, I planed to keep the code expandable; I want to implement more features and therefore everything is currently more split up then it needs to be (so I think). That's why there is fact.*pp
and a neuron
constructor which takes pointers to functions as arguments, even though I currently don't use this feature. There is a lot of stuff I want to do, like proper error handling. But before I go on, I wanted to have everything up to now reviewed.
Now, what I don't like from a design/ logical perspective is that the neurons have to much knowledge about the network: the neurons know about their successors and predecessors. They know the rates between them and they even are responsible to change them. These are actually all task, I originally intended to put in the network
class. In fact I had the code written this way. But then, the implementation of all functions, like determining the deltas and changing the rates was a horrible index war, with many "hard to read and nested" loops. This way I the code got much cleaner (and probably better to maintain, yet maybe there is even a better way for my "logical" standpoint).
Another thing I found cool at the start and then starting to hate is the conditional compilation. A few years ago I worked in a big software project (expressomd) and we used this a lot. I actually like the concept that I can choose at compile time which features I want to use, but in there current form the code is just to cluttered and it won't get better with more conditional statements. I thought about removing these parts for the review, but I am looking for constructive feedback and I'm looking forward to any suggestions concerning this! Note: I thought about putting each different "feature" in its own file, but this is not possible since features influence each other and the amount of files would grow to fast... I have really no glue how to address this.
EDIT: I moved a lot of the compile time options into enum
's. I am still pretty sure, this isn't the optimal solution, yet I think it is better than before.
Summary
What general advice do you have for a novice programmer?
Where do I deviate from best practice, where is my code hard to read/ unlogical?
Is my overall design/ ideas good? How do I better plan such projects?
(old question, see edit) Can I avoid a bunch of nested conditional compilation steps and still have control over the features of my code at compile time?
I currently rely on
enum
's to choose at run time the features of code that I want. This works better for me than the previous approach, but how can I further improve on this? It still feels a little clunky.
Thanks in advance, I really looking forward to every answer :)
EDIT
The greatest thing I changed are the conditional compilation flags: I removed them. It just felt more and more bad. I now use enum
's and switch
statements to decide at run time which part of code I want. Maybe I come back later to some compile time features. Additionally, I changed the learning_rate
and layer_fact
(the factor by with the learning rate grows by each layer) from public variables to function arguments. To keep the code compatible (and because I like default arguments) I assigned a default value
to them. Quick questions: are numerical default arguments magic numbers?
1 Answer 1
It's a long question then I'm sure many others will find something else. Here just few thoughts about your code.
You're using include guards (#ifndef
) but I feel more easy with #pragma
once. Yes, it's not standard but it's supported by all major compilers and less error prone (also slightly faster but nothing we may/should care).
In origin
, target
and neuron
you have public fields. I think you seldom need a public uncontrolled field in your C++ code (a method set*()
may be a good beginning but more explicative names like reset()
are even better).
You have many references to primitive types (for example double&
). I do not know in your architecture what pointer size is but if it's bigger than sizeof(double)
then you're probably wasting memory and in any case you may prevent some compiler optimizations (because of pointer aliases, standard do not mandate how references must be implemented.) Yes, you may use const
but it's so weak that AFAIK it's happily ignored by any optimizer.
Why neuron
constructor is declared with double (*fact)(double)
instead of std::function<double(double)>
? Well it may even be a const&
(but I think it'll have worse performance and more complicate interactions with lambdas).
neuron
has overloaded assignment operator but I do not see any copy constructor (what about move constructor?) You even have a std::vector<neuron>
. Many words have been printed (elsewhere) about this.
Do not ever declare using namespace std;
.
std::vector<std::vector<neuron>>
seems little bit too much for me, maybe it's time to use a specific type for that? Boost has a multidimensional array, alternatively you may use template<typename T> using multivector = std::vector<std::vector<T>>
(or something similar...)
unsigned int
isn't better than simply unsigned
.
When you move around a std::vector<double>
you may want to make it const&
. Compiler may (at least for small programs and simple code) be smart enough but it's also about communicating intent.
network::_initialize_random_rates()
is too complex. Partly because you didn't split it in multiple functions and partly because you're using an enum
. Why don't you use enum class
? Better: you have delegates for many things, why don't you also use a class for this? To drop delegates will simplify your code, distribute responsibilities and it will make it easier to extend. The same is true for network::train_epoch()
.
EDIT: what's this suggested refactoring? You already delegate some features to an external entity (rne
, for example). Extend the same concept to replace enum
: in my opinion they're invaluable (better if using enum class
) for small tasks where they slightly change method behavior but they start to smell when used to switch between big blocks of code (in multiple places). In your case it may be like this (proof of concept):
class neural_network_type {
public:
virtual void initialize(network& net) = 0;
};
You will then have a feedforward_network_type
. Note that you may have a parameter/property to specify neural_network_type
concrete class (direct replacement of enum
) or it may be a template argument:
template <typename TType>
class network {
private:
TType brainStructure;
};
Note that I removed the public accessible field. Even if you opt for the other solution does it make it sense to have it as ctor argument? Will you ever change its value again? If ctor starts to have too many parameters you may want to introduce a design struct will all required arguments, something like this:
struct network_description {
neural_network_type type;
vector<unsigned int> neurons_per_layer;
rne random_engine
};
With this ctor (but you may also want to use a factory method):
network::network(const network_description& description) {
}
The very next step is to have different network derived classes. One of each type you want to support and a factory method:
class network {
public:
static network* create(const network_description& description) {
// ...
}
protected:
virtual void initialize_random_rates() = 0;
};
class feedforward_network : public network {
// ...
};
One last thing that I didn't note at first: do not start identifiers with an underscore, they're reserved. Simply do not prefix private methods.
About conditional compilation: it may be used or not (as you said to include some features in compiled code) but note that:
- Compiler is smart enough to remove unused code (then you won't have any gain but faster compilation.)
- If you're building a library then you force your users to recompile the library or to keep multiple versions of it.
- Code in header files won't be compiled at all if they're not included.
I usually use conditional compilation only in the following cases (but of course there isn't a rule):
- I want to include extra code for debugging/tracing purposes (for example
#ifdef DEBUG_ALLOCATION
to include verbose logging about memory allocation.) - I want to include extra code during development/debugging (for example additional asserts and run-time checks.)
- Code to compile is different for different architetures/compilers (check any portable library...)
- Some code in my library is experimental, users must explicitly ask for it (something like
#ifdef MYLIB_EXPERIMENTAL_FEATURES
) and it can't be simply moved in separate headers (what C++ standard library does.)
-
\$\begingroup\$ First of all, thanks for the answer. Before accepting, I want to wait the full week. Secondly, can you please elaborate the part with the delegates a little more detailed? I am not sure, how I should tackle this. \$\endgroup\$manthano– manthano2016年11月13日 12:17:00 +00:00Commented Nov 13, 2016 at 12:17
-
1\$\begingroup\$ @manthano yes, of course. I highlighted just few minor things that came to my mind, I still think someone else will post a much better answer. \$\endgroup\$Adriano Repetti– Adriano Repetti2016年11月14日 12:09:00 +00:00Commented Nov 14, 2016 at 12:09