12
\$\begingroup\$

I have some neural network Julia code which I'm hoping to speed up. It's possibly simply poorly-designed, but I'm not convinced.

# This is line 1 (for the purpose of matching the profiler output's line numbers)
sigmoid(z::Float64) = 1/(1 + exp(-z))
sigmoidPrime(z::Float64) = sigmoid(z) * (1 - sigmoid(z))
### Types ###
abstract AbstractNode
type Edge
 source::AbstractNode
 target::AbstractNode
 weight::Float64
 derivative::Float64
 augmented::Bool
 Edge(source::AbstractNode, target::AbstractNode) = new(source, target, randn(), 0.0, false)
end
type Node <: AbstractNode
 incomingEdges::Vector{Edge}
 outgoingEdges::Vector{Edge}
 activation::Float64
 activationPrime::Float64
 Node() = new([], [], -1.0, -1.0)
end
type InputNode <: AbstractNode
 index::Int
 incomingEdges::Vector{Edge}
 outgoingEdges::Vector{Edge}
 activation::Float64
 InputNode(index::Int) = new(index, [], [], -1.0)
end
type BiasNode <: AbstractNode
 incomingEdges::Vector{Edge}
 outgoingEdges::Vector{Edge}
 activation::Float64
 BiasNode() = new([], [], 1.0)
end
type Network
 inputNodes::Vector{InputNode}
 hiddenNodes::Vector{Node}
 outputNodes::Vector{Node}
 function Network(layerSizes::Array, bias::Bool=true)
 inputNodes = [InputNode(i) for i in 1:layerSizes[1]];
 hiddenNodes = [Node() for _ in 1:layerSizes[2]];
 outputNodes = [Node() for _ in 1:layerSizes[3]];
 for inputNode in inputNodes
 for node in hiddenNodes
 edge = Edge(inputNode, node);
 push!(inputNode.outgoingEdges, edge)
 push!(node.incomingEdges, edge)
 end
 end
 for node in hiddenNodes
 for outputNode in outputNodes
 edge = Edge(node, outputNode);
 push!(node.outgoingEdges, edge)
 push!(outputNode.incomingEdges, edge)
 end
 end
 if bias == true
 biasNode = BiasNode()
 for node in hiddenNodes
 edge = Edge(biasNode, node);
 push!(biasNode.outgoingEdges, edge)
 push!(node.incomingEdges, edge)
 end
 end
 new(inputNodes, hiddenNodes, outputNodes)
 end
end
### Methods ###
function evaluate(obj::Node, inputVector::Array)
 if obj.activation > -0.5
 return obj.activation
 else
 weightedSum = sum([d.weight * evaluate(d.source, inputVector) for d in obj.incomingEdges])
 obj.activation = sigmoid(weightedSum)
 obj.activationPrime = sigmoidPrime(weightedSum)
 return obj.activation
 end
end
function evaluate(obj::InputNode, inputVector::Array)
 obj.activation = inputVector[obj.index]
 return obj.activation
end
function evaluate(obj::BiasNode, inputVector::Array)
 obj.activation = 1.0
 return obj.activation
end
function updateWeights(obj::AbstractNode, learningRate::Float64)
 for d in obj.incomingEdges
 if d.augmented == false
 d.augmented = true
 d.weight -= learningRate * d.derivative
 updateWeights(d.source, learningRate)
 d.derivative = 0.0
 end
 end
end
function compute(obj::Network, inputVector::Array)
 output = [evaluate(node, inputVector) for node in obj.outputNodes]
 for node in obj.outputNodes
 clear(node)
 end
 return output
end
function clear(obj::AbstractNode)
 for d in obj.incomingEdges
 obj.activation = -1.0
 obj.activationPrime = -1.0
 if d.augmented == true
 d.augmented = false
 clear(d.source)
 end
 end
end
function propagateDerivatives(obj::AbstractNode, error::Float64)
 for d in obj.incomingEdges
 if d.augmented == false
 d.augmented = true
 d.derivative += error * obj.activationPrime * d.source.activation
 propagateDerivatives(d.source, error * d.weight * obj.activationPrime)
 end
 end
end
function backpropagation(obj::Network, example::Array)
 output = [evaluate(node, example[1]) for node in obj.outputNodes]
 error = output - example[2]
 for (node, err) in zip(obj.outputNodes, error)
 propagateDerivatives(node, err)
 end
end
function train(obj::Network, labeledExamples::Array, sampleSize::Int, learningRate::Float64=0.7, iterations::Int=10000)
 for _ in 1:iterations
 println(_)
 batch = labeledExamples[rand(1:length(labeledExamples), sampleSize)]
 for ex in batch
 backpropagation(obj, ex)
 for node in obj.outputNodes
 clear(node)
 end
 end
 for node in obj.outputNodes
 updateWeights(node, learningRate)
 end
 for node in obj.outputNodes
 clear(node)
 end
 end
end

Here is the relevant Profile.print() backtrack (total "time"=10993):

 7643 ...ia/neuralnetwork.jl; backpropagation; line: 154
 2 ...ia/neuralnetwork.jl; propagateDerivatives; line: 141
 9 ...ia/neuralnetwork.jl; propagateDerivatives; line: 142
 103 ...ia/neuralnetwork.jl; propagateDerivatives; line: 144
 7 .../lib/julia/sys.dylib; *; (unknown line)
 3 .../lib/julia/sys.dylib; +; (unknown line)
 1 .../lib/julia/sys.dylib; +; (unknown line)
 7517 ...ia/neuralnetwork.jl; propagateDerivatives; line: 145
 241 ...a/neuralnetwork.jl; propagateDerivatives; line: 141
 1118 ...a/neuralnetwork.jl; propagateDerivatives; line: 142
 1 ...a/neuralnetwork.jl; propagateDerivatives; line: 143
 4784 ...a/neuralnetwork.jl; propagateDerivatives; line: 144
 375 ...lib/julia/sys.dylib; *; (unknown line)
 13 ...lib/julia/sys.dylib; *; (unknown line)
 550 ...lib/julia/sys.dylib; +; (unknown line)
 15 ...lib/julia/sys.dylib; +; (unknown line)
 354 ...lib/julia/sys.dylib; convert; (unknown line)
 1352 ...a/neuralnetwork.jl; propagateDerivatives; line: 145
 2 ...ib/julia/sys.dylib; typeinf_ext; (unknown line)
 2 ...lib/julia/sys.dylib; typeinf; (unknown line)
 2 ...lib/julia/sys.dylib; typeinf; (unknown line)
 2 ...lib/julia/sys.dylib; inlining_pass; (unknown line)
 2 ...ib/julia/sys.dylib; inlining_pass; (unknown line)
 2 ...ib/julia/sys.dylib; inlining_pass; (unknown line)
 2 ...b/julia/sys.dylib; inlineable; (unknown line)
 1 ...b/julia/sys.dylib; cell_1d; (unknown line)
 277 ...a/neuralnetwork.jl; propagateDerivatives; line: 141
 18 ...a/neuralnetwork.jl; propagateDerivatives; line: 145
 1725 ...ia/neuralnetwork.jl; train; line: 166
 3 ...ia/neuralnetwork.jl; clear; line: 130
 7 ...ia/neuralnetwork.jl; clear; line: 133
 1715 ...ia/neuralnetwork.jl; clear; line: 135
 110 ...ia/neuralnetwork.jl; clear; line: 130
 18 ...ia/neuralnetwork.jl; clear; line: 131
 7 ...ia/neuralnetwork.jl; clear; line: 132
 940 ...ia/neuralnetwork.jl; clear; line: 133
 625 ...ia/neuralnetwork.jl; clear; line: 135

There isn't a way I know of to highlight particular lines of code, but in the profiler traceback, the three suspicious lines are 142, 144, and 133. Together they make up over half the time spent on the code, and yet all three seem rather innocuous.

Line 142 corresponds to a simple if statement if d.augmented == false in the propagateDerivatives function.

Line 144 corresponds to the line d.derivative += error * obj.activationPrime * d.source.activation also in the propagateDerivatives function. (The code spends a whopping 4784 times as long on this line as it does on the line right before it, even though they are accessed the same number of times, that seems pretty fishy to me.)

Line 133 corresponds to the if statement if d.augmented == true in the clear function.

In the end, even if I'm able to double its speed, it still isn't hardly as fast as different neural network algorithms with a less elaborate class structure written in PyPy. This algorithm, while elegant, probably simply stores too much stuff in memory to be terribly efficient, that's my guess anyway.

Here is the flat Profiler backtrack:

 Count File Function Line
 403 ...ces/julia/lib/julia/sys.dylib * -1
 574 ...ces/julia/lib/julia/sys.dylib + -1
 1 ...ces/julia/lib/julia/sys.dylib _basemod -1
 2 ...ces/julia/lib/julia/sys.dylib _iisconst -1
 64 ...ces/julia/lib/julia/sys.dylib _methods -1
 30 ...ces/julia/lib/julia/sys.dylib abstract_call -1
 32 ...ces/julia/lib/julia/sys.dylib abstract_call_gf -1
 48 ...ces/julia/lib/julia/sys.dylib abstract_eval -1
 20 ...ces/julia/lib/julia/sys.dylib abstract_eval_arg -1
 46 ...ces/julia/lib/julia/sys.dylib abstract_eval_call -1
 27 ...ces/julia/lib/julia/sys.dylib abstract_interpret -1
 1 ...ces/julia/lib/julia/sys.dylib cell_1d -1
 2 ...ces/julia/lib/julia/sys.dylib contains_is1 -1
 360 ...ces/julia/lib/julia/sys.dylib convert -1
 1 ...ces/julia/lib/julia/sys.dylib copy! -1
 6 ...ces/julia/lib/julia/sys.dylib disassociate_julia_struct -1
 18 ...ces/julia/lib/julia/sys.dylib effect_free -1
 2 ...ces/julia/lib/julia/sys.dylib eval_annotate -1
 1 ...ces/julia/lib/julia/sys.dylib getindex -1
 10932 ...ces/julia/lib/julia/sys.dylib include -1
 10932 ...ces/julia/lib/julia/sys.dylib include_from_node1 -1
 2 ...ces/julia/lib/julia/sys.dylib inline_worthy -1
 18 ...ces/julia/lib/julia/sys.dylib inlineable -1
 62 ...ces/julia/lib/julia/sys.dylib inlining_pass -1
 8 ...ces/julia/lib/julia/sys.dylib is_known_call -1
 1 ...ces/julia/lib/julia/sys.dylib is_known_call_p -1
 2 ...ces/julia/lib/julia/sys.dylib isconst -1
 5 ...ces/julia/lib/julia/sys.dylib isconstantfunc -1
 1 ...ces/julia/lib/julia/sys.dylib length -1
 7 ...ces/julia/lib/julia/sys.dylib occurs_more -1
 24 ...ces/julia/lib/julia/sys.dylib occurs_outside_tupleref -1
 4 ...ces/julia/lib/julia/sys.dylib stchanged -1
 2 ...ces/julia/lib/julia/sys.dylib stupdate -1
 3 ...ces/julia/lib/julia/sys.dylib tuple_elim_pass -1
 18 ...ces/julia/lib/julia/sys.dylib tupleref_elim_pass -1
 1 ...ces/julia/lib/julia/sys.dylib type_annotate -1
 146 ...ces/julia/lib/julia/sys.dylib typeinf -1
 47 ...ces/julia/lib/julia/sys.dylib typeinf_ext -1
 1 ...ces/julia/lib/julia/sys.dylib unique_name -1
 1 ...ces/julia/lib/julia/sys.dylib unsafe_copy! -1
 1344 ...esktop/julia/neuralnetwork.jl backpropagation 151
 8 ...esktop/julia/neuralnetwork.jl backpropagation 152
 9 ...esktop/julia/neuralnetwork.jl backpropagation 153
 7643 ...esktop/julia/neuralnetwork.jl backpropagation 154
 231 ...esktop/julia/neuralnetwork.jl clear 130
 20 ...esktop/julia/neuralnetwork.jl clear 131
 8 ...esktop/julia/neuralnetwork.jl clear 132
 974 ...esktop/julia/neuralnetwork.jl clear 133
 2432 ...esktop/julia/neuralnetwork.jl clear 135
 1 ...esktop/julia/neuralnetwork.jl evaluate 90
 2604 ...esktop/julia/neuralnetwork.jl evaluate 92
 2 ...esktop/julia/neuralnetwork.jl evaluate 93
 34 ...esktop/julia/neuralnetwork.jl evaluate 101
 1 ...esktop/julia/neuralnetwork.jl evaluate 102
 1 ...esktop/julia/neuralnetwork.jl evaluate 106
 532 ...esktop/julia/neuralnetwork.jl propagateDerivatives 141
 1127 ...esktop/julia/neuralnetwork.jl propagateDerivatives 142
 1 ...esktop/julia/neuralnetwork.jl propagateDerivatives 143
 4887 ...esktop/julia/neuralnetwork.jl propagateDerivatives 144
 8887 ...esktop/julia/neuralnetwork.jl propagateDerivatives 145
 10 ...esktop/julia/neuralnetwork.jl train 160
 2 ...esktop/julia/neuralnetwork.jl train 161
 9031 ...esktop/julia/neuralnetwork.jl train 163
 1725 ...esktop/julia/neuralnetwork.jl train 166
 77 ...esktop/julia/neuralnetwork.jl train 171
 53 ...esktop/julia/neuralnetwork.jl train 175
 8 ...esktop/julia/neuralnetwork.jl updateWeights 111
 31 ...esktop/julia/neuralnetwork.jl updateWeights 112
 2 ...esktop/julia/neuralnetwork.jl updateWeights 114
 117 ...esktop/julia/neuralnetwork.jl updateWeights 115
 1 ...esktop/julia/neuralnetwork.jl updateWeights 116
 10932 REPL.jl eval_user_input 54
 1 REPL.jl eval_user_input 55
 1 array.jl - 719
 1 array.jl getindex 271
 10932 profile.jl anonymous 14
 1 random.jl RandIntGen 179
 7 reduce.jl _mapreduce 168
 7 reduce.jl mapreduce_pairwise_impl 125
 1 reduce.jl mapreduce_seq_impl 210
 2 reduce.jl mapreduce_seq_impl 211
 1 reduce.jl mapreduce_seq_impl 212
 1 reduce.jl mapreduce_seq_impl 214
 2 reduce.jl mapreduce_seq_impl 217
 2 string.jl print 4
 8 string.jl println 5
 10933 task.jl anonymous 96
Jamal
35.2k13 gold badges134 silver badges238 bronze badges
asked Aug 15, 2014 at 2:50
\$\endgroup\$
4
  • \$\begingroup\$ noob question: why don't you use a some kind of Matricial calculation instead of introducicing new class ? \$\endgroup\$ Commented Aug 15, 2014 at 23:11
  • \$\begingroup\$ @Imorin, because it's much more readable and elegant in this form, but you're right the efficient way is to vectorize it (or with Julia use nested loops). My hope was that Julia would be fast enough that I wouldn't have to write it in a different form, but that doesn't seem to be the case. \$\endgroup\$ Commented Aug 15, 2014 at 23:41
  • \$\begingroup\$ It is neither efficient nor readable to implement neural nets without matrix operations. \$\endgroup\$ Commented Aug 15, 2014 at 23:58
  • \$\begingroup\$ It should be efficient in Julia using nested loops, but apparently not using a network class structure. \$\endgroup\$ Commented Aug 16, 2014 at 0:07

2 Answers 2

6
\$\begingroup\$

Alright so I've done a lot of experimenting and the problem seems to be accessing data internal to a type via another type. Specifically accessing d.source.activation appears to be about 100 times more costly than just accessing d.augmented. If you replace d.source.activation with 1.0 you'll more than double the speed.

It's also possible this is because the type info provided to the compiler concerning source is an abstract type AbstractNode, rather than a concrete type. But I know of no way around this.

Either way this is as far as I can tell a serious performance issue which I don't immediately see how to circumvent.

However the other two suspicious lines, upon closer consideration, seem actually to be about right, so it's just line 144 which was causing the slow down.

Moreover by getting rid of the syntactic sugar += in line 144, one can shave about 10 percent off the time, so there is an issue with that as well.

answered Aug 15, 2014 at 8:09
\$\endgroup\$
2
\$\begingroup\$

This change is predicated on the assumption that your program is currently giving you the correct output. If a BiasNode or InputNode is passed into either clear(...) or propDer(...) these functions will crash because they each access activationPrime, a field which is unique to type Node. If we change the parameter type of obj in each of these functions to Node we should see some speed-up in the execution of your program. The reason why is because we are no longer passing a parameter with a type that has arbitrary size with arbitrary fields (AbstractNode), instead we are passing a parameter with a type that has explicit size and explicit fields (Node). Also in clear(...) I moved the access and modification of obj's fields outside of the for loop, as they should only need to be modified once.

 function clear(obj::Node)
 obj.activation = -1.0
 obj.activationPrime = -1.0
 for d in obj.incomingEdges
 # loop body
 end
 end
 function propagateDerivatives(obj::Node, error::Float64)
 # function body
 end
answered Aug 15, 2014 at 5:03
\$\endgroup\$
2
  • \$\begingroup\$ But then these functions could not accept InputNode or BiasNode. \$\endgroup\$ Commented Aug 15, 2014 at 5:26
  • \$\begingroup\$ Neither InputNode nor BiasNode have any incoming edges, thus the functions never attempt to access obj.activationPrime in their case. \$\endgroup\$ Commented Aug 15, 2014 at 6:18

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.