1

EDIT: As suggested by others I post the whole code which can be run the only think you would need is to have Eigen installed.

This is the main header with three classes, in summary ADNodeInstance is a data holder, ADNode is encapsulating a pointer to ADNodeInstance, and ADGraphBuilder is a main class which holds all the ADNodeInstances and manages them.:

//
// Created by alex on 07/05/15.
//
#ifndef ADVIBE_STACK_H
#define ADVIBE_STACK_H
#include "memory"
#include "iostream"
#include "fstream"
#include "Eigen/Dense"
typedef Eigen::MatrixXd Matrix;
enum class OPERATOR {
 NONE,
 ADD,
 UNNARY_NEGATION,
 MULTIPLY,
 DOT_MULTIPLY, // Linear algebra
 VERTICAL_CONCATENATION,
 TRANSPOSE,
 SUBINDEX,
 TANH
};
enum TYPE{
 CONSTANT,
 INPUT,
 INPUT_DERIVED,
 GRADIENT
};
/**
 * Main graph classes
 */
class ADNodeInstance {
public:
 TYPE type;
 int id;
 OPERATOR op;
 int dim1, dim2;
 std::string name;
 Matrix value;
 std::vector<std::weak_ptr<ADNodeInstance>> parents;
 std::vector<std::weak_ptr<ADNodeInstance>> children;
 ADNodeInstance(TYPE type, int id, OPERATOR op, int dim1, int dim2, std::string name) :
 type(type), id(id), op(op), dim1(dim1), dim2(dim2), name(name) { };
 ADNodeInstance(TYPE type, int id, OPERATOR op, const Eigen::Ref<const Matrix> &value) :
 type(type), id(id), op(op), dim1(value.rows()), dim2(value.cols()), value(value) { };
 ADNodeInstance(TYPE type, int id, OPERATOR op, int dim1, int dim2) :
 type(type), id(id), op(op), dim1(dim1), dim2(dim2) { }
 ~ADNodeInstance() { };
};
class ADGraphBuilder;
class ADNode{
public:
 ADGraphBuilder* builder;
 std::shared_ptr<ADNodeInstance> pointer;
 ADNode(){};
 ADNode(ADGraphBuilder* builder):
 builder(builder){};
 ADNode(const ADNode& node):
 builder(node.builder), pointer(node.pointer){};
 ~ADNode() {};
 inline ADNode getVariable(OPERATOR op, const int dim1, const int dim2) const;
 inline ADNode apply(OPERATOR op) const{
 ADNode result = getVariable(op, this->pointer->dim1, this->pointer->dim2);
 result.pointer->parents.push_back(pointer);
 pointer->children.push_back(result.pointer);
 return result;
 };
 inline ADNode tanh() const{
 return apply(OPERATOR::TANH);
 }
 inline ADNode transpose() const{
 ADNode result = getVariable(OPERATOR::TRANSPOSE, this->pointer->dim2, this->pointer->dim1);
 result.pointer->parents.push_back(pointer);
 pointer->children.push_back(result.pointer);
 return result;
 }
 inline ADNode operator-() const{
 std::cout << pointer->id << " U " << std::endl;
 std::cout << "NEG: " << builder << " - " << pointer->id << std::endl;
 ADNode node = apply(OPERATOR::UNNARY_NEGATION);
 std::cout << "NEG2: " << builder << " - " << pointer->id << std::endl;
 return node;
 };
 inline ADNode block(const int loc1, const int loc2, const int dim1, const int dim2) const {
 ADNode result = getVariable(OPERATOR::SUBINDEX, dim1, dim2);
 result.pointer->parents.push_back(pointer);
 pointer->children.push_back(result.pointer);
 return result;
 }
};
class ADGraphBuilder{
protected:
 int counter;
public:
 std::vector<ADNode> nodes;
 ADGraphBuilder():
 counter(0){};
 ~ADGraphBuilder(){};
 std::vector<ADNode> gradient(const ADNode& target, const ADNode& param);
 ADNode createGradientMessage(const ADNode& child, const ADNode& parent, const ADNode& directGradient);
 inline ADNode createInternalVariable(const TYPE type, const OPERATOR op, const int dim1, const int dim2) {
 ADNode result = ADNode(this);
 result.pointer = std::make_shared<ADNodeInstance>(type, counter++, op, dim1, dim2);
 nodes.push_back(result);
 return result;
 }
 inline ADNode createInternalVariable(const TYPE type, const OPERATOR op, const int dim1, const int dim2, const std::string name) {
 ADNode result = ADNode(this);
 result.pointer = std::make_shared<ADNodeInstance>(type, counter++, op, dim1, dim2, name);
 nodes.push_back(result);
 return result;
 }
 inline ADNode createConstantVariable(const Eigen::Ref<const Matrix>& value){
 ADNode result = ADNode(this);
 result.pointer = std::make_shared<ADNodeInstance>(TYPE::CONSTANT, counter++, OPERATOR::NONE , value);
 nodes.push_back(result);
 return result;
 }
 inline ADNode createVariable(const int dim1, const int dim2){
 ADNode result = ADNode(this);
 result.pointer = std::make_shared<ADNodeInstance>(TYPE::INPUT, counter++, OPERATOR::NONE , dim1, dim2);
 nodes.push_back(result);
 return result;
 }
};
inline ADNode ADNode::getVariable(OPERATOR op, const int dim1, const int dim2) const{
 if(this->pointer->type == GRADIENT)
 return builder->createInternalVariable(GRADIENT, op, dim1, dim2);
 else
 return builder->createInternalVariable(INPUT_DERIVED, op, dim1, dim2);
}
/**
 * Concatenation
 */
inline ADNode vertCat(const ADNode& lhs, const ADNode& rhs) {
 if (lhs.builder != rhs.builder) {
 std::cerr << "Can not operate with notes from different builder instances!" << std::endl;
 throw 1;
 }
 if (lhs.pointer->dim2 != rhs.pointer->dim2) {
 std::cerr << "Can not perform vertical concatanation on nodes with different size in dimension 2." << std::endl;
 throw 2;
 }
 ADNode result = lhs.builder->createInternalVariable(INPUT_DERIVED, OPERATOR::VERTICAL_CONCATENATION,
 lhs.pointer->dim1 + rhs.pointer->dim1, lhs.pointer->dim2);
 result.pointer->parents.push_back(lhs.pointer);
 result.pointer->parents.push_back(rhs.pointer);
 lhs.pointer->children.push_back(result.pointer);
 rhs.pointer->children.push_back(result.pointer);
 return result;
}
inline ADNode vertCat(const ADNode& lhs, const double rhs) {
 return vertCat(lhs, lhs.builder->createConstantVariable(Matrix::Constant(1,1, rhs)));
}
/**
 * Joining nodes
 */
ADNode joinNodes(const OPERATOR op, const ADNode& lhs, const ADNode& rhs){
 if(lhs.builder != rhs.builder){
 std::cerr << "Different builders " << std::endl;
 throw 5;
 }
 ADNode result = lhs.builder->createInternalVariable(INPUT_DERIVED, op , 1, 1);
 result.pointer->parents.push_back(lhs.pointer);
 result.pointer->parents.push_back(rhs.pointer);
 lhs.pointer->children.push_back(result.pointer);
 rhs.pointer->children.push_back(result.pointer);
 return result;
}
inline ADNode dot(const ADNode& lhs, const ADNode& rhs){
 return joinNodes(OPERATOR::DOT_MULTIPLY, lhs, rhs);
}
inline ADNode tanh(const ADNode& lhs){
 return lhs.tanh();
}
inline ADNode operator*(const ADNode& lhs, const ADNode& rhs){
 std::cout << "MULT: " << rhs.builder << " - " << rhs.pointer->id << std::endl;
 ADNode node = joinNodes(OPERATOR::MULTIPLY, lhs, rhs);
 std::cout << "MULT2: " << rhs.builder << " - " << rhs.pointer->id << std::endl;
 return node;
}
inline ADNode operator+(const ADNode& lhs, const ADNode& rhs){
 std::cout << "ADD: " << rhs.builder << " - " << rhs.pointer->id << std::endl;
 ADNode node = joinNodes(OPERATOR::ADD, lhs, rhs);
 std::cout << "ADD2: " << rhs.builder << " - " << rhs.pointer->id << std::endl;
 return node;
}
inline ADNode operator-(const double lhs, const ADNode& rhs){
 return rhs.builder->createConstantVariable(Matrix::Constant(1,1,lhs)) + (-rhs);
}
inline int getIndex(const std::vector<std::weak_ptr<ADNodeInstance>> vector, const ADNode& target){
 for (int i = 0; i < vector.size(); ++i) {
 auto item = vector[i].lock();
 if (item->id == target.pointer->id)
 return i;
 }
 return -1;
}
std::vector<ADNode> ADGraphBuilder::gradient(const ADNode &target, const ADNode& params) {
 std::vector<ADNode> results{1, this};
 // Will contain information about the subtree that effects the target node
 bool subtree[nodes.size()];
 std::vector<std::vector<ADNode>> childrenGradients(nodes.size(), std::vector<ADNode>{});
 std::vector<ADNode> directGradients{nodes.size(), this};
 subtree[target.pointer->id] = true;
 for (int i = target.pointer->id; i >= 0; i--) {
 std::cout << "passed " << i << std::endl;
 if(subtree[i]){
 // Create a new gradient node summing up all incoming gradient messages
 if(i == target.pointer->id) {
 directGradients[i] = createConstantVariable(Matrix::Ones(target.pointer->dim1, target.pointer->dim2));
 directGradients[i].pointer->name = "Gradient[" + std::to_string(i) + "]";
 }
 else
 directGradients[i] = createInternalVariable(GRADIENT, OPERATOR::ADD,
 nodes[i].pointer->dim1, nodes[i].pointer->dim2, std::to_string(i));
 for (auto child : childrenGradients[i]) {
 directGradients[i].pointer->parents.push_back(child.pointer);
 child.pointer->children.push_back(directGradients[i].pointer);
 }
 for (auto item : nodes[i].pointer->parents) {
 auto parent = item.lock();
 // Check if parent is not constant
 if(parent->type != CONSTANT) {
 // Mark parent as node to traverse
 subtree[parent->id] = true;
 // Create the corresponding message as a new node and add it to the parents list
 // of gradient incoming messages
 ADNode capsule = ADNode(this);
 capsule.pointer = parent;
 //childrenGradients[parent->id].push_back(createConstantVariable(Matrix::Constant(1,1,1)));
 std::cout << "Capsule: " << capsule.builder << " - " << capsule.pointer->id << std::endl;
 childrenGradients[parent->id].push_back(createGradientMessage(nodes[i], capsule, directGradients[i]));
 std::cout << "Capsule: " << capsule.builder << " - " << capsule.pointer->id << std::endl;
 //childrenGradients[parent->id].back().pointer->name = "Gradinet[" + childrenGradients[parent->id].back().pointer->name + "]";
 }
 }
 }
 }
 return results;
};
ADNode ADGraphBuilder::createGradientMessage(const ADNode& child, const ADNode& parent, const ADNode& directGradient){
 switch (child.pointer->op){
 case(OPERATOR::NONE):
 std::cout << "Making gradient of an INPUT node? Are you serious?";
 return createInternalVariable(CONSTANT, OPERATOR::NONE, child.pointer->dim1,child.pointer->dim2,
 std::to_string(child.pointer->id) + "->" + std::to_string(parent.pointer->id));
 case(OPERATOR::ADD):
 return directGradient;
 case(OPERATOR::UNNARY_NEGATION):
 return -directGradient;
 case(OPERATOR::DOT_MULTIPLY):
 if(child.pointer->parents.size() == 2){
 int index = getIndex(child.pointer->parents, parent);
 ADNode otherParent(child.builder);
 otherParent.pointer = child.pointer->parents[1-index].lock();
 if(index == 0)
 return dot(directGradient,otherParent.transpose());
 else
 return dot(otherParent.transpose(),directGradient);
 }
 else{
 std::cerr<< "Unimplemented" << std::endl;
 return NULL;
 }
 case(OPERATOR::VERTICAL_CONCATENATION): {
 int index = getIndex(child.pointer->parents, parent);
 int start = 0;
 for (int i = 0; i < index; ++i) {
 auto parent = child.pointer->parents[i].lock();
 start += parent->dim1;
 }
 return directGradient.block(start, 0, parent.pointer->dim1, parent.pointer->dim2);
 }
 case(OPERATOR::TANH):
 return directGradient * child * (1 - child);
 }
}
#endif //ADVIBE_STACK_H

The main program:

#include <iostream>
#include "Eigen/Dense"
#include "stack.h"
int main() {
 ADGraphBuilder builder;
 ADNode param1 = builder.createVariable(2,1);
 ADNode param2 = builder.createVariable(2,3);
 Matrix sad(1,1);
 sad << 2;
 ADNode h = tanh(dot(param2, vertCat(param1,1)));
 for (int i = 0; i < 0; ++i) {
 h = tanh(dot(param2,vertCat(h,1)));
 }
 ADNode e = dot(h.transpose(), h);
 auto grad = builder.gradient(e, param2);
 std::cout << "(" << e.pointer->dim1 << ", " << e.pointer->dim2 << ")" << std::endl;
 return 0;
}

When I ran it the output I get is the following:

passed 7
Capsule: 0x7ffffff7a750 - 6
Capsule: 0x7ffffff7a750 - 6
Capsule: 0x7ffffff7a750 - 5
Capsule: 0x7ffffff7a750 - 5
passed 6
Capsule: 0x7ffffff7a750 - 5
Capsule: 0x7ffffff7a750 - 5
passed 5
Capsule: 0x7ffffff7a750 - 4
5 U 
NEG: 0x7ffffff7a750 - 5
NEG2: 0x7ffffff7a750 - 5
ADD: 0x7ffffff7a750 - 15
ADD2: 0x7ffffff7a750 - 15
MULT: 0 - 5
Different builders 
terminate called after throwing an instance of 'int'
Process finished with exit code 134

According to this this mean that in the expression in function ADGraphBuilder::createGradientMessage for TANH for some reason the expression child * (1 - child) after the right operand is evaluated the child variable is garbage collected or I don't know?

Any ideas and suggestions are welcome and still thanks to Azad and Whoz for the discussion so far.

asked May 7, 2015 at 4:15
19
  • Looks like "ADNode result = ADNode(this)" makes suspicious modifications Commented May 7, 2015 at 4:21
  • 1
    While you're at it, Include the code for the ADNode constructor takes ADNode(this). That isn't a "normal" construct. And of course, copy and move ctors and assignment ops are probably worth looking at. Commented May 7, 2015 at 4:22
  • I've correct it. That is the constructor since it is a member function of the builder class. Hope what I updated makes it more sensible. Commented May 7, 2015 at 4:30
  • That constructor taking a pointer to ADGraphBuilder worries me. What if the pointer is to an object that is destructed before the ADNode object is destructed, like a temporary object? That would leave you with a stale pointer and probable UB. Commented May 7, 2015 at 4:36
  • I agree ADNode(this) is suspicious. this is a pointer and you're casting it. Did you meant ADNode(*this) ? Commented May 7, 2015 at 4:38

0

Know someone who can answer? Share a link to this question via email, Twitter, or Facebook.

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.