Main Page Namespace List Class Hierarchy Alphabetical List Compound List File List Namespace Members Compound Members File Members Related Pages
colvar_neuralnetworkcompute.C

Go to the documentation of this file.
00001 // -*- Mode:c++; c-basic-offset: 4; -*-
00002 
00003 // This file is part of the Collective Variables module (Colvars).
00004 // The original version of Colvars and its updates are located at:
00005 // https://github.com/Colvars/colvars
00006 // Please update all Colvars source files before making any changes.
00007 // If you wish to distribute your changes, please submit them to the
00008 // Colvars repository at GitHub.
00009 
00010 #include <iostream>
00011 #include <fstream>
00012 
00013 #if (__cplusplus >= 201103L)
00014 #include "colvar_neuralnetworkcompute.h"
00015 #include "colvarparse.h"
00016 #include "colvarproxy.h"
00017 
00018 namespace neuralnetworkCV {
00019 std::map<std::string, std::pair<std::function<double(double)>, std::function<double(double)>>> activation_function_map
00020 {
00021 {"tanh", {[](double x){return std::tanh(x);},
00022 [](double x){return 1.0 - std::tanh(x) * std::tanh(x);}}},
00023 {"sigmoid", {[](double x){return 1.0 / (1.0 + std::exp(-x));},
00024 [](double x){return std::exp(-x) / ((1.0 + std::exp(-x)) * (1.0 + std::exp(-x)));}}},
00025 {"linear", {[](double x){return x;},
00026 [](double /*x*/){return 1.0;}}},
00027 {"relu", {[](double x){return x < 0. ? 0. : x;},
00028 [](double x){return x < 0. ? 0. : 1.;}}},
00029 {"lrelu100", {[](double x){return x < 0. ? 0.01 * x : x;},
00030 [](double x){return x < 0. ? 0.01 : 1.;}}},
00031 {"elu", {[](double x){return x < 0. ? std::exp(x)-1. : x;},
00032 [](double x){return x < 0. ? std::exp(x) : 1.;}}}
00033 };
00034 
00035 #ifdef LEPTON
00036 customActivationFunction::customActivationFunction():
00037 expression(), value_evaluator(nullptr), gradient_evaluator(nullptr),
00038 input_reference(nullptr), derivative_reference(nullptr) {}
00039 
00040 customActivationFunction::customActivationFunction(const std::string& expression_string):
00041 expression(), value_evaluator(nullptr), gradient_evaluator(nullptr),
00042 input_reference(nullptr), derivative_reference(nullptr) {
00043 setExpression(expression_string);
00044 }
00045 
00046 customActivationFunction::customActivationFunction(const customActivationFunction& source):
00047 expression(), value_evaluator(nullptr), gradient_evaluator(nullptr),
00048 input_reference(nullptr), derivative_reference(nullptr) {
00049 // check if the source object is initialized
00050 if (source.value_evaluator != nullptr) {
00051 this->setExpression(source.expression);
00052 }
00053 }
00054 
00055 customActivationFunction& customActivationFunction::operator=(const customActivationFunction& source) {
00056 if (source.value_evaluator != nullptr) {
00057 this->setExpression(source.expression);
00058 } else {
00059 expression = std::string();
00060 value_evaluator = nullptr;
00061 gradient_evaluator = nullptr;
00062 input_reference = nullptr;
00063 derivative_reference = nullptr;
00064 }
00065 return *this;
00066 }
00067 
00068 void customActivationFunction::setExpression(const std::string& expression_string) {
00069 expression = expression_string;
00070 Lepton::ParsedExpression parsed_expression;
00071 // the variable must be "x" for the input of an activation function
00072 const std::string activation_input_variable{"x"};
00073 // parse the expression
00074 try {
00075 parsed_expression = Lepton::Parser::parse(expression);
00076 } catch (...) {
00077 cvm::error("Error parsing or compiling expression \"" + expression + "\".\n", COLVARS_INPUT_ERROR);
00078 }
00079 // compile the expression
00080 try {
00081 value_evaluator = std::unique_ptr<Lepton::CompiledExpression>(new Lepton::CompiledExpression(parsed_expression.createCompiledExpression()));
00082 } catch (...) {
00083 cvm::error("Error compiling expression \"" + expression + "\".\n", COLVARS_INPUT_ERROR);
00084 }
00085 // create a compiled expression for the derivative
00086 try {
00087 gradient_evaluator = std::unique_ptr<Lepton::CompiledExpression>(new Lepton::CompiledExpression(parsed_expression.differentiate(activation_input_variable).createCompiledExpression()));
00088 } catch (...) {
00089 cvm::error("Error creating compiled expression for variable \"" + activation_input_variable + "\".\n", COLVARS_INPUT_ERROR);
00090 }
00091 // get the reference to the input variable in the compiled expression
00092 try {
00093 input_reference = &(value_evaluator->getVariableReference(activation_input_variable));
00094 } catch (...) {
00095 cvm::error("Error on getting the reference to variable \"" + activation_input_variable + "\" in the compiled expression.\n", COLVARS_INPUT_ERROR);
00096 }
00097 // get the reference to the input variable in the compiled derivative expression
00098 try {
00099 derivative_reference = &(gradient_evaluator->getVariableReference(activation_input_variable));
00100 } catch (...) {
00101 cvm::error("Error on getting the reference to variable \"" + activation_input_variable + "\" in the compiled derivative exprssion.\n", COLVARS_INPUT_ERROR);
00102 }
00103 }
00104 
00105 std::string customActivationFunction::getExpression() const {
00106 return expression;
00107 }
00108 
00109 double customActivationFunction::evaluate(double x) const {
00110 *input_reference = x;
00111 return value_evaluator->evaluate();
00112 }
00113 
00114 double customActivationFunction::derivative(double x) const {
00115 *derivative_reference = x;
00116 return gradient_evaluator->evaluate();
00117 }
00118 #endif
00119 
00120 denseLayer::denseLayer(const std::string& weights_file, const std::string& biases_file, const std::function<double(double)>& f, const std::function<double(double)>& df): m_activation_function(f), m_activation_function_derivative(df) {
00121 #ifdef LEPTON
00122 m_use_custom_activation = false;
00123 #endif
00124 readFromFile(weights_file, biases_file);
00125 }
00126 
00127 #ifdef LEPTON
00128 denseLayer::denseLayer(const std::string& weights_file, const std::string& biases_file, const std::string& custom_activation_expression) {
00129 m_use_custom_activation = true;
00130 m_custom_activation_function = customActivationFunction(custom_activation_expression);
00131 readFromFile(weights_file, biases_file);
00132 }
00133 #endif
00134 
00135 void denseLayer::readFromFile(const std::string& weights_file, const std::string& biases_file) {
00136 // parse weights file
00137 m_weights.clear();
00138 m_biases.clear();
00139 std::string line;
00140 colvarproxy *proxy = cvm::main()->proxy;
00141 auto &ifs_weights = proxy->input_stream(weights_file, "weights file");
00142 while (std::getline(ifs_weights, line)) {
00143 if (!ifs_weights) {
00144 throw std::runtime_error("I/O error while reading " + weights_file);
00145 }
00146 std::vector<std::string> splitted_data;
00147 colvarparse::split_string(line, std::string{" "}, splitted_data);
00148 if (splitted_data.size() > 0) {
00149 std::vector<double> weights_tmp(splitted_data.size());
00150 for (size_t i = 0; i < splitted_data.size(); ++i) {
00151 try {
00152 weights_tmp[i] = std::stod(splitted_data[i]);
00153 } catch (...) {
00154 throw std::runtime_error("Cannot convert " + splitted_data[i] + " to a number while reading file " + weights_file);
00155 }
00156 }
00157 m_weights.push_back(weights_tmp);
00158 }
00159 }
00160 proxy->close_input_stream(weights_file);
00161 
00162 // parse biases file
00163 auto &ifs_biases = proxy->input_stream(biases_file, "biases file");
00164 while (std::getline(ifs_biases, line)) {
00165 if (!ifs_biases) {
00166 throw std::runtime_error("I/O error while reading " + biases_file);
00167 }
00168 std::vector<std::string> splitted_data;
00169 colvarparse::split_string(line, std::string{" "}, splitted_data);
00170 if (splitted_data.size() > 0) {
00171 double bias = 0;
00172 try {
00173 bias = std::stod(splitted_data[0]);
00174 } catch (...) {
00175 throw std::runtime_error("Cannot convert " + splitted_data[0] + " to a number while reading file " + biases_file);
00176 }
00177 m_biases.push_back(bias);
00178 }
00179 }
00180 proxy->close_input_stream(biases_file);
00181 
00182 m_input_size = m_weights[0].size();
00183 m_output_size = m_weights.size();
00184 }
00185 
00186 void denseLayer::setActivationFunction(const std::function<double(double)>& f, const std::function<double(double)>& df) {
00187 m_activation_function = f;
00188 m_activation_function_derivative = df;
00189 }
00190 
00191 void denseLayer::compute(const std::vector<double>& input, std::vector<double>& output) const {
00192 for (size_t i = 0; i < m_output_size; ++i) {
00193 output[i] = 0;
00194 for (size_t j = 0; j < m_input_size; ++j) {
00195 output[i] += input[j] * m_weights[i][j];
00196 }
00197 output[i] += m_biases[i];
00198 #ifdef LEPTON
00199 if (m_use_custom_activation) {
00200 output[i] = m_custom_activation_function.evaluate(output[i]);
00201 } else {
00202 #endif
00203 output[i] = m_activation_function(output[i]);
00204 #ifdef LEPTON
00205 }
00206 #endif
00207 }
00208 }
00209 
00210 double denseLayer::computeGradientElement(const std::vector<double>& input, const size_t i, const size_t j) const {
00211 double sum_with_bias = 0;
00212 for (size_t j_in = 0; j_in < m_input_size; ++j_in) {
00213 sum_with_bias += input[j_in] * m_weights[i][j_in];
00214 }
00215 sum_with_bias += m_biases[i];
00216 #ifdef LEPTON
00217 if (m_use_custom_activation) {
00218 const double grad_ij = m_custom_activation_function.derivative(sum_with_bias) * m_weights[i][j];
00219 return grad_ij;
00220 } else {
00221 #endif
00222 const double grad_ij = m_activation_function_derivative(sum_with_bias) * m_weights[i][j];
00223 return grad_ij;
00224 #ifdef LEPTON
00225 }
00226 #endif
00227 }
00228 
00229 void denseLayer::computeGradient(const std::vector<double>& input, std::vector<std::vector<double>>& output_grad) const {
00230 for (size_t j = 0; j < m_input_size; ++j) {
00231 for (size_t i = 0; i < m_output_size; ++i) {
00232 output_grad[i][j] = computeGradientElement(input, i, j);
00233 }
00234 }
00235 }
00236 
00237 neuralNetworkCompute::neuralNetworkCompute(const std::vector<denseLayer>& dense_layers): m_dense_layers(dense_layers) {
00238 m_layers_output.resize(m_dense_layers.size());
00239 m_grads_tmp.resize(m_dense_layers.size());
00240 for (size_t i_layer = 0; i_layer < m_layers_output.size(); ++i_layer) {
00241 m_layers_output[i_layer].assign(m_dense_layers[i_layer].getOutputSize(), 0);
00242 m_grads_tmp[i_layer].assign(m_dense_layers[i_layer].getOutputSize(), std::vector<double>(m_dense_layers[i_layer].getInputSize(), 0));
00243 }
00244 }
00245 
00246 bool neuralNetworkCompute::addDenseLayer(const denseLayer& layer) {
00247 if (m_dense_layers.empty()) {
00248 // add layer to this ann directly if m_dense_layers is empty
00249 m_dense_layers.push_back(layer);
00250 m_layers_output.push_back(std::vector<double>(layer.getOutputSize()));
00251 m_grads_tmp.push_back(std::vector<std::vector<double>>(layer.getOutputSize(), std::vector<double>(layer.getInputSize(), 0)));
00252 return true;
00253 } else {
00254 // otherwise, we need to check if the output of last layer in m_dense_layers matches the input of layer to be added
00255 if (m_dense_layers.back().getOutputSize() == layer.getInputSize()) {
00256 m_dense_layers.push_back(layer);
00257 m_layers_output.push_back(std::vector<double>(layer.getOutputSize()));
00258 m_grads_tmp.push_back(std::vector<std::vector<double>>(layer.getOutputSize(), std::vector<double>(layer.getInputSize(), 0)));
00259 return true;
00260 } else {
00261 return false;
00262 }
00263 }
00264 }
00265 
00266 std::vector<std::vector<double>> neuralNetworkCompute::multiply_matrix(const std::vector<std::vector<double>>& A, const std::vector<std::vector<double>>& B) {
00267 const size_t m = A.size();
00268 const size_t n = B.size();
00269 if (A[0].size() != n) {
00270 std::cerr << "Error on multiplying matrices!\n";
00271 }
00272 const size_t t = B[0].size();
00273 std::vector<std::vector<double>> C(m, std::vector<double>(t, 0.0));
00274 for (size_t i = 0; i < m; ++i) {
00275 for (size_t j = 0; j < t; ++j) {
00276 for (size_t k = 0; k < n; ++k) {
00277 C[i][j] += A[i][k] * B[k][j];
00278 }
00279 }
00280 }
00281 return C;
00282 }
00283 
00284 void neuralNetworkCompute::compute() {
00285 if (m_dense_layers.empty()) {
00286 return;
00287 }
00288 size_t i_layer;
00289 m_dense_layers[0].compute(m_input, m_layers_output[0]);
00290 for (i_layer = 1; i_layer < m_dense_layers.size(); ++i_layer) {
00291 m_dense_layers[i_layer].compute(m_layers_output[i_layer - 1], m_layers_output[i_layer]);
00292 }
00293 // gradients of each layer
00294 m_dense_layers[0].computeGradient(m_input, m_grads_tmp[0]);
00295 for (i_layer = 1; i_layer < m_dense_layers.size(); ++i_layer) {
00296 m_dense_layers[i_layer].computeGradient(m_layers_output[i_layer - 1], m_grads_tmp[i_layer]);
00297 }
00298 // chain rule
00299 if (m_dense_layers.size() > 1) {
00300 m_chained_grad = multiply_matrix(m_grads_tmp[1], m_grads_tmp[0]);
00301 for (i_layer = 2; i_layer < m_dense_layers.size(); ++i_layer) {
00302 m_chained_grad = multiply_matrix(m_grads_tmp[i_layer], m_chained_grad);
00303 }
00304 } else {
00305 m_chained_grad = m_grads_tmp[0];
00306 }
00307 }
00308 }
00309 
00310 #endif