00001 // -*- Mode:c++; c-basic-offset: 4; -*- 00002 00003 // This file is part of the Collective Variables module (Colvars). 00004 // The original version of Colvars and its updates are located at: 00005 // https://github.com/Colvars/colvars 00006 // Please update all Colvars source files before making any changes. 00007 // If you wish to distribute your changes, please submit them to the 00008 // Colvars repository at GitHub. 00009 00010 #include <iostream> 00011 #include <fstream> 00012 00013 #if (__cplusplus >= 201103L) 00014 #include "colvar_neuralnetworkcompute.h" 00015 #include "colvarparse.h" 00016 #include "colvarproxy.h" 00017 00018 namespace neuralnetworkCV { 00019 std::map<std::string, std::pair<std::function<double(double)>, std::function<double(double)>>> activation_function_map 00020 { 00021 {"tanh", {[](double x){return std::tanh(x);}, 00022 [](double x){return 1.0 - std::tanh(x) * std::tanh(x);}}}, 00023 {"sigmoid", {[](double x){return 1.0 / (1.0 + std::exp(-x));}, 00024 [](double x){return std::exp(-x) / ((1.0 + std::exp(-x)) * (1.0 + std::exp(-x)));}}}, 00025 {"linear", {[](double x){return x;}, 00026 [](double /*x*/){return 1.0;}}}, 00027 {"relu", {[](double x){return x < 0. ? 0. : x;}, 00028 [](double x){return x < 0. ? 0. : 1.;}}}, 00029 {"lrelu100", {[](double x){return x < 0. ? 0.01 * x : x;}, 00030 [](double x){return x < 0. ? 0.01 : 1.;}}}, 00031 {"elu", {[](double x){return x < 0. ? std::exp(x)-1. : x;}, 00032 [](double x){return x < 0. ? std::exp(x) : 1.;}}} 00033 }; 00034 00035 #ifdef LEPTON 00036 customActivationFunction::customActivationFunction(): 00037 expression(), value_evaluator(nullptr), gradient_evaluator(nullptr), 00038 input_reference(nullptr), derivative_reference(nullptr) {} 00039 00040 customActivationFunction::customActivationFunction(const std::string& expression_string): 00041 expression(), value_evaluator(nullptr), gradient_evaluator(nullptr), 00042 input_reference(nullptr), derivative_reference(nullptr) { 00043 setExpression(expression_string); 00044 } 00045 00046 customActivationFunction::customActivationFunction(const customActivationFunction& source): 00047 expression(), value_evaluator(nullptr), gradient_evaluator(nullptr), 00048 input_reference(nullptr), derivative_reference(nullptr) { 00049 // check if the source object is initialized 00050 if (source.value_evaluator != nullptr) { 00051 this->setExpression(source.expression); 00052 } 00053 } 00054 00055 customActivationFunction& customActivationFunction::operator=(const customActivationFunction& source) { 00056 if (source.value_evaluator != nullptr) { 00057 this->setExpression(source.expression); 00058 } else { 00059 expression = std::string(); 00060 value_evaluator = nullptr; 00061 gradient_evaluator = nullptr; 00062 input_reference = nullptr; 00063 derivative_reference = nullptr; 00064 } 00065 return *this; 00066 } 00067 00068 void customActivationFunction::setExpression(const std::string& expression_string) { 00069 expression = expression_string; 00070 Lepton::ParsedExpression parsed_expression; 00071 // the variable must be "x" for the input of an activation function 00072 const std::string activation_input_variable{"x"}; 00073 // parse the expression 00074 try { 00075 parsed_expression = Lepton::Parser::parse(expression); 00076 } catch (...) { 00077 cvm::error("Error parsing or compiling expression \"" + expression + "\".\n", COLVARS_INPUT_ERROR); 00078 } 00079 // compile the expression 00080 try { 00081 value_evaluator = std::unique_ptr<Lepton::CompiledExpression>(new Lepton::CompiledExpression(parsed_expression.createCompiledExpression())); 00082 } catch (...) { 00083 cvm::error("Error compiling expression \"" + expression + "\".\n", COLVARS_INPUT_ERROR); 00084 } 00085 // create a compiled expression for the derivative 00086 try { 00087 gradient_evaluator = std::unique_ptr<Lepton::CompiledExpression>(new Lepton::CompiledExpression(parsed_expression.differentiate(activation_input_variable).createCompiledExpression())); 00088 } catch (...) { 00089 cvm::error("Error creating compiled expression for variable \"" + activation_input_variable + "\".\n", COLVARS_INPUT_ERROR); 00090 } 00091 // get the reference to the input variable in the compiled expression 00092 try { 00093 input_reference = &(value_evaluator->getVariableReference(activation_input_variable)); 00094 } catch (...) { 00095 cvm::error("Error on getting the reference to variable \"" + activation_input_variable + "\" in the compiled expression.\n", COLVARS_INPUT_ERROR); 00096 } 00097 // get the reference to the input variable in the compiled derivative expression 00098 try { 00099 derivative_reference = &(gradient_evaluator->getVariableReference(activation_input_variable)); 00100 } catch (...) { 00101 cvm::error("Error on getting the reference to variable \"" + activation_input_variable + "\" in the compiled derivative exprssion.\n", COLVARS_INPUT_ERROR); 00102 } 00103 } 00104 00105 std::string customActivationFunction::getExpression() const { 00106 return expression; 00107 } 00108 00109 double customActivationFunction::evaluate(double x) const { 00110 *input_reference = x; 00111 return value_evaluator->evaluate(); 00112 } 00113 00114 double customActivationFunction::derivative(double x) const { 00115 *derivative_reference = x; 00116 return gradient_evaluator->evaluate(); 00117 } 00118 #endif 00119 00120 denseLayer::denseLayer(const std::string& weights_file, const std::string& biases_file, const std::function<double(double)>& f, const std::function<double(double)>& df): m_activation_function(f), m_activation_function_derivative(df) { 00121 #ifdef LEPTON 00122 m_use_custom_activation = false; 00123 #endif 00124 readFromFile(weights_file, biases_file); 00125 } 00126 00127 #ifdef LEPTON 00128 denseLayer::denseLayer(const std::string& weights_file, const std::string& biases_file, const std::string& custom_activation_expression) { 00129 m_use_custom_activation = true; 00130 m_custom_activation_function = customActivationFunction(custom_activation_expression); 00131 readFromFile(weights_file, biases_file); 00132 } 00133 #endif 00134 00135 void denseLayer::readFromFile(const std::string& weights_file, const std::string& biases_file) { 00136 // parse weights file 00137 m_weights.clear(); 00138 m_biases.clear(); 00139 std::string line; 00140 colvarproxy *proxy = cvm::main()->proxy; 00141 auto &ifs_weights = proxy->input_stream(weights_file, "weights file"); 00142 while (std::getline(ifs_weights, line)) { 00143 if (!ifs_weights) { 00144 throw std::runtime_error("I/O error while reading " + weights_file); 00145 } 00146 std::vector<std::string> splitted_data; 00147 colvarparse::split_string(line, std::string{" "}, splitted_data); 00148 if (splitted_data.size() > 0) { 00149 std::vector<double> weights_tmp(splitted_data.size()); 00150 for (size_t i = 0; i < splitted_data.size(); ++i) { 00151 try { 00152 weights_tmp[i] = std::stod(splitted_data[i]); 00153 } catch (...) { 00154 throw std::runtime_error("Cannot convert " + splitted_data[i] + " to a number while reading file " + weights_file); 00155 } 00156 } 00157 m_weights.push_back(weights_tmp); 00158 } 00159 } 00160 proxy->close_input_stream(weights_file); 00161 00162 // parse biases file 00163 auto &ifs_biases = proxy->input_stream(biases_file, "biases file"); 00164 while (std::getline(ifs_biases, line)) { 00165 if (!ifs_biases) { 00166 throw std::runtime_error("I/O error while reading " + biases_file); 00167 } 00168 std::vector<std::string> splitted_data; 00169 colvarparse::split_string(line, std::string{" "}, splitted_data); 00170 if (splitted_data.size() > 0) { 00171 double bias = 0; 00172 try { 00173 bias = std::stod(splitted_data[0]); 00174 } catch (...) { 00175 throw std::runtime_error("Cannot convert " + splitted_data[0] + " to a number while reading file " + biases_file); 00176 } 00177 m_biases.push_back(bias); 00178 } 00179 } 00180 proxy->close_input_stream(biases_file); 00181 00182 m_input_size = m_weights[0].size(); 00183 m_output_size = m_weights.size(); 00184 } 00185 00186 void denseLayer::setActivationFunction(const std::function<double(double)>& f, const std::function<double(double)>& df) { 00187 m_activation_function = f; 00188 m_activation_function_derivative = df; 00189 } 00190 00191 void denseLayer::compute(const std::vector<double>& input, std::vector<double>& output) const { 00192 for (size_t i = 0; i < m_output_size; ++i) { 00193 output[i] = 0; 00194 for (size_t j = 0; j < m_input_size; ++j) { 00195 output[i] += input[j] * m_weights[i][j]; 00196 } 00197 output[i] += m_biases[i]; 00198 #ifdef LEPTON 00199 if (m_use_custom_activation) { 00200 output[i] = m_custom_activation_function.evaluate(output[i]); 00201 } else { 00202 #endif 00203 output[i] = m_activation_function(output[i]); 00204 #ifdef LEPTON 00205 } 00206 #endif 00207 } 00208 } 00209 00210 double denseLayer::computeGradientElement(const std::vector<double>& input, const size_t i, const size_t j) const { 00211 double sum_with_bias = 0; 00212 for (size_t j_in = 0; j_in < m_input_size; ++j_in) { 00213 sum_with_bias += input[j_in] * m_weights[i][j_in]; 00214 } 00215 sum_with_bias += m_biases[i]; 00216 #ifdef LEPTON 00217 if (m_use_custom_activation) { 00218 const double grad_ij = m_custom_activation_function.derivative(sum_with_bias) * m_weights[i][j]; 00219 return grad_ij; 00220 } else { 00221 #endif 00222 const double grad_ij = m_activation_function_derivative(sum_with_bias) * m_weights[i][j]; 00223 return grad_ij; 00224 #ifdef LEPTON 00225 } 00226 #endif 00227 } 00228 00229 void denseLayer::computeGradient(const std::vector<double>& input, std::vector<std::vector<double>>& output_grad) const { 00230 for (size_t j = 0; j < m_input_size; ++j) { 00231 for (size_t i = 0; i < m_output_size; ++i) { 00232 output_grad[i][j] = computeGradientElement(input, i, j); 00233 } 00234 } 00235 } 00236 00237 neuralNetworkCompute::neuralNetworkCompute(const std::vector<denseLayer>& dense_layers): m_dense_layers(dense_layers) { 00238 m_layers_output.resize(m_dense_layers.size()); 00239 m_grads_tmp.resize(m_dense_layers.size()); 00240 for (size_t i_layer = 0; i_layer < m_layers_output.size(); ++i_layer) { 00241 m_layers_output[i_layer].assign(m_dense_layers[i_layer].getOutputSize(), 0); 00242 m_grads_tmp[i_layer].assign(m_dense_layers[i_layer].getOutputSize(), std::vector<double>(m_dense_layers[i_layer].getInputSize(), 0)); 00243 } 00244 } 00245 00246 bool neuralNetworkCompute::addDenseLayer(const denseLayer& layer) { 00247 if (m_dense_layers.empty()) { 00248 // add layer to this ann directly if m_dense_layers is empty 00249 m_dense_layers.push_back(layer); 00250 m_layers_output.push_back(std::vector<double>(layer.getOutputSize())); 00251 m_grads_tmp.push_back(std::vector<std::vector<double>>(layer.getOutputSize(), std::vector<double>(layer.getInputSize(), 0))); 00252 return true; 00253 } else { 00254 // otherwise, we need to check if the output of last layer in m_dense_layers matches the input of layer to be added 00255 if (m_dense_layers.back().getOutputSize() == layer.getInputSize()) { 00256 m_dense_layers.push_back(layer); 00257 m_layers_output.push_back(std::vector<double>(layer.getOutputSize())); 00258 m_grads_tmp.push_back(std::vector<std::vector<double>>(layer.getOutputSize(), std::vector<double>(layer.getInputSize(), 0))); 00259 return true; 00260 } else { 00261 return false; 00262 } 00263 } 00264 } 00265 00266 std::vector<std::vector<double>> neuralNetworkCompute::multiply_matrix(const std::vector<std::vector<double>>& A, const std::vector<std::vector<double>>& B) { 00267 const size_t m = A.size(); 00268 const size_t n = B.size(); 00269 if (A[0].size() != n) { 00270 std::cerr << "Error on multiplying matrices!\n"; 00271 } 00272 const size_t t = B[0].size(); 00273 std::vector<std::vector<double>> C(m, std::vector<double>(t, 0.0)); 00274 for (size_t i = 0; i < m; ++i) { 00275 for (size_t j = 0; j < t; ++j) { 00276 for (size_t k = 0; k < n; ++k) { 00277 C[i][j] += A[i][k] * B[k][j]; 00278 } 00279 } 00280 } 00281 return C; 00282 } 00283 00284 void neuralNetworkCompute::compute() { 00285 if (m_dense_layers.empty()) { 00286 return; 00287 } 00288 size_t i_layer; 00289 m_dense_layers[0].compute(m_input, m_layers_output[0]); 00290 for (i_layer = 1; i_layer < m_dense_layers.size(); ++i_layer) { 00291 m_dense_layers[i_layer].compute(m_layers_output[i_layer - 1], m_layers_output[i_layer]); 00292 } 00293 // gradients of each layer 00294 m_dense_layers[0].computeGradient(m_input, m_grads_tmp[0]); 00295 for (i_layer = 1; i_layer < m_dense_layers.size(); ++i_layer) { 00296 m_dense_layers[i_layer].computeGradient(m_layers_output[i_layer - 1], m_grads_tmp[i_layer]); 00297 } 00298 // chain rule 00299 if (m_dense_layers.size() > 1) { 00300 m_chained_grad = multiply_matrix(m_grads_tmp[1], m_grads_tmp[0]); 00301 for (i_layer = 2; i_layer < m_dense_layers.size(); ++i_layer) { 00302 m_chained_grad = multiply_matrix(m_grads_tmp[i_layer], m_chained_grad); 00303 } 00304 } else { 00305 m_chained_grad = m_grads_tmp[0]; 00306 } 00307 } 00308 } 00309 00310 #endif