I have some neural network Julia code which I'm hoping to speed up. It's possibly simply poorly-designed, but I'm not convinced.
# This is line 1 (for the purpose of matching the profiler output's line numbers)
sigmoid(z::Float64) = 1/(1 + exp(-z))
sigmoidPrime(z::Float64) = sigmoid(z) * (1 - sigmoid(z))
### Types ###
abstract AbstractNode
type Edge
source::AbstractNode
target::AbstractNode
weight::Float64
derivative::Float64
augmented::Bool
Edge(source::AbstractNode, target::AbstractNode) = new(source, target, randn(), 0.0, false)
end
type Node <: AbstractNode
incomingEdges::Vector{Edge}
outgoingEdges::Vector{Edge}
activation::Float64
activationPrime::Float64
Node() = new([], [], -1.0, -1.0)
end
type InputNode <: AbstractNode
index::Int
incomingEdges::Vector{Edge}
outgoingEdges::Vector{Edge}
activation::Float64
InputNode(index::Int) = new(index, [], [], -1.0)
end
type BiasNode <: AbstractNode
incomingEdges::Vector{Edge}
outgoingEdges::Vector{Edge}
activation::Float64
BiasNode() = new([], [], 1.0)
end
type Network
inputNodes::Vector{InputNode}
hiddenNodes::Vector{Node}
outputNodes::Vector{Node}
function Network(layerSizes::Array, bias::Bool=true)
inputNodes = [InputNode(i) for i in 1:layerSizes[1]];
hiddenNodes = [Node() for _ in 1:layerSizes[2]];
outputNodes = [Node() for _ in 1:layerSizes[3]];
for inputNode in inputNodes
for node in hiddenNodes
edge = Edge(inputNode, node);
push!(inputNode.outgoingEdges, edge)
push!(node.incomingEdges, edge)
end
end
for node in hiddenNodes
for outputNode in outputNodes
edge = Edge(node, outputNode);
push!(node.outgoingEdges, edge)
push!(outputNode.incomingEdges, edge)
end
end
if bias == true
biasNode = BiasNode()
for node in hiddenNodes
edge = Edge(biasNode, node);
push!(biasNode.outgoingEdges, edge)
push!(node.incomingEdges, edge)
end
end
new(inputNodes, hiddenNodes, outputNodes)
end
end
### Methods ###
function evaluate(obj::Node, inputVector::Array)
if obj.activation > -0.5
return obj.activation
else
weightedSum = sum([d.weight * evaluate(d.source, inputVector) for d in obj.incomingEdges])
obj.activation = sigmoid(weightedSum)
obj.activationPrime = sigmoidPrime(weightedSum)
return obj.activation
end
end
function evaluate(obj::InputNode, inputVector::Array)
obj.activation = inputVector[obj.index]
return obj.activation
end
function evaluate(obj::BiasNode, inputVector::Array)
obj.activation = 1.0
return obj.activation
end
function updateWeights(obj::AbstractNode, learningRate::Float64)
for d in obj.incomingEdges
if d.augmented == false
d.augmented = true
d.weight -= learningRate * d.derivative
updateWeights(d.source, learningRate)
d.derivative = 0.0
end
end
end
function compute(obj::Network, inputVector::Array)
output = [evaluate(node, inputVector) for node in obj.outputNodes]
for node in obj.outputNodes
clear(node)
end
return output
end
function clear(obj::AbstractNode)
for d in obj.incomingEdges
obj.activation = -1.0
obj.activationPrime = -1.0
if d.augmented == true
d.augmented = false
clear(d.source)
end
end
end
function propagateDerivatives(obj::AbstractNode, error::Float64)
for d in obj.incomingEdges
if d.augmented == false
d.augmented = true
d.derivative += error * obj.activationPrime * d.source.activation
propagateDerivatives(d.source, error * d.weight * obj.activationPrime)
end
end
end
function backpropagation(obj::Network, example::Array)
output = [evaluate(node, example[1]) for node in obj.outputNodes]
error = output - example[2]
for (node, err) in zip(obj.outputNodes, error)
propagateDerivatives(node, err)
end
end
function train(obj::Network, labeledExamples::Array, sampleSize::Int, learningRate::Float64=0.7, iterations::Int=10000)
for _ in 1:iterations
println(_)
batch = labeledExamples[rand(1:length(labeledExamples), sampleSize)]
for ex in batch
backpropagation(obj, ex)
for node in obj.outputNodes
clear(node)
end
end
for node in obj.outputNodes
updateWeights(node, learningRate)
end
for node in obj.outputNodes
clear(node)
end
end
end
Here is the relevant Profile.print()
backtrack (total "time"=10993):
7643 ...ia/neuralnetwork.jl; backpropagation; line: 154 2 ...ia/neuralnetwork.jl; propagateDerivatives; line: 141 9 ...ia/neuralnetwork.jl; propagateDerivatives; line: 142 103 ...ia/neuralnetwork.jl; propagateDerivatives; line: 144 7 .../lib/julia/sys.dylib; *; (unknown line) 3 .../lib/julia/sys.dylib; +; (unknown line) 1 .../lib/julia/sys.dylib; +; (unknown line) 7517 ...ia/neuralnetwork.jl; propagateDerivatives; line: 145 241 ...a/neuralnetwork.jl; propagateDerivatives; line: 141 1118 ...a/neuralnetwork.jl; propagateDerivatives; line: 142 1 ...a/neuralnetwork.jl; propagateDerivatives; line: 143 4784 ...a/neuralnetwork.jl; propagateDerivatives; line: 144 375 ...lib/julia/sys.dylib; *; (unknown line) 13 ...lib/julia/sys.dylib; *; (unknown line) 550 ...lib/julia/sys.dylib; +; (unknown line) 15 ...lib/julia/sys.dylib; +; (unknown line) 354 ...lib/julia/sys.dylib; convert; (unknown line) 1352 ...a/neuralnetwork.jl; propagateDerivatives; line: 145 2 ...ib/julia/sys.dylib; typeinf_ext; (unknown line) 2 ...lib/julia/sys.dylib; typeinf; (unknown line) 2 ...lib/julia/sys.dylib; typeinf; (unknown line) 2 ...lib/julia/sys.dylib; inlining_pass; (unknown line) 2 ...ib/julia/sys.dylib; inlining_pass; (unknown line) 2 ...ib/julia/sys.dylib; inlining_pass; (unknown line) 2 ...b/julia/sys.dylib; inlineable; (unknown line) 1 ...b/julia/sys.dylib; cell_1d; (unknown line) 277 ...a/neuralnetwork.jl; propagateDerivatives; line: 141 18 ...a/neuralnetwork.jl; propagateDerivatives; line: 145 1725 ...ia/neuralnetwork.jl; train; line: 166 3 ...ia/neuralnetwork.jl; clear; line: 130 7 ...ia/neuralnetwork.jl; clear; line: 133 1715 ...ia/neuralnetwork.jl; clear; line: 135 110 ...ia/neuralnetwork.jl; clear; line: 130 18 ...ia/neuralnetwork.jl; clear; line: 131 7 ...ia/neuralnetwork.jl; clear; line: 132 940 ...ia/neuralnetwork.jl; clear; line: 133 625 ...ia/neuralnetwork.jl; clear; line: 135
There isn't a way I know of to highlight particular lines of code, but in the profiler traceback, the three suspicious lines are 142, 144, and 133. Together they make up over half the time spent on the code, and yet all three seem rather innocuous.
Line 142 corresponds to a simple if statement if d.augmented == false
in the propagateDerivatives
function.
Line 144 corresponds to the line d.derivative += error * obj.activationPrime * d.source.activation
also in the propagateDerivatives
function. (The code spends a whopping 4784 times as long on this line as it does on the line right before it, even though they are accessed the same number of times, that seems pretty fishy to me.)
Line 133 corresponds to the if statement if d.augmented == true
in the clear
function.
In the end, even if I'm able to double its speed, it still isn't hardly as fast as different neural network algorithms with a less elaborate class structure written in PyPy. This algorithm, while elegant, probably simply stores too much stuff in memory to be terribly efficient, that's my guess anyway.
Here is the flat Profiler backtrack:
Count File Function Line 403 ...ces/julia/lib/julia/sys.dylib * -1 574 ...ces/julia/lib/julia/sys.dylib + -1 1 ...ces/julia/lib/julia/sys.dylib _basemod -1 2 ...ces/julia/lib/julia/sys.dylib _iisconst -1 64 ...ces/julia/lib/julia/sys.dylib _methods -1 30 ...ces/julia/lib/julia/sys.dylib abstract_call -1 32 ...ces/julia/lib/julia/sys.dylib abstract_call_gf -1 48 ...ces/julia/lib/julia/sys.dylib abstract_eval -1 20 ...ces/julia/lib/julia/sys.dylib abstract_eval_arg -1 46 ...ces/julia/lib/julia/sys.dylib abstract_eval_call -1 27 ...ces/julia/lib/julia/sys.dylib abstract_interpret -1 1 ...ces/julia/lib/julia/sys.dylib cell_1d -1 2 ...ces/julia/lib/julia/sys.dylib contains_is1 -1 360 ...ces/julia/lib/julia/sys.dylib convert -1 1 ...ces/julia/lib/julia/sys.dylib copy! -1 6 ...ces/julia/lib/julia/sys.dylib disassociate_julia_struct -1 18 ...ces/julia/lib/julia/sys.dylib effect_free -1 2 ...ces/julia/lib/julia/sys.dylib eval_annotate -1 1 ...ces/julia/lib/julia/sys.dylib getindex -1 10932 ...ces/julia/lib/julia/sys.dylib include -1 10932 ...ces/julia/lib/julia/sys.dylib include_from_node1 -1 2 ...ces/julia/lib/julia/sys.dylib inline_worthy -1 18 ...ces/julia/lib/julia/sys.dylib inlineable -1 62 ...ces/julia/lib/julia/sys.dylib inlining_pass -1 8 ...ces/julia/lib/julia/sys.dylib is_known_call -1 1 ...ces/julia/lib/julia/sys.dylib is_known_call_p -1 2 ...ces/julia/lib/julia/sys.dylib isconst -1 5 ...ces/julia/lib/julia/sys.dylib isconstantfunc -1 1 ...ces/julia/lib/julia/sys.dylib length -1 7 ...ces/julia/lib/julia/sys.dylib occurs_more -1 24 ...ces/julia/lib/julia/sys.dylib occurs_outside_tupleref -1 4 ...ces/julia/lib/julia/sys.dylib stchanged -1 2 ...ces/julia/lib/julia/sys.dylib stupdate -1 3 ...ces/julia/lib/julia/sys.dylib tuple_elim_pass -1 18 ...ces/julia/lib/julia/sys.dylib tupleref_elim_pass -1 1 ...ces/julia/lib/julia/sys.dylib type_annotate -1 146 ...ces/julia/lib/julia/sys.dylib typeinf -1 47 ...ces/julia/lib/julia/sys.dylib typeinf_ext -1 1 ...ces/julia/lib/julia/sys.dylib unique_name -1 1 ...ces/julia/lib/julia/sys.dylib unsafe_copy! -1 1344 ...esktop/julia/neuralnetwork.jl backpropagation 151 8 ...esktop/julia/neuralnetwork.jl backpropagation 152 9 ...esktop/julia/neuralnetwork.jl backpropagation 153 7643 ...esktop/julia/neuralnetwork.jl backpropagation 154 231 ...esktop/julia/neuralnetwork.jl clear 130 20 ...esktop/julia/neuralnetwork.jl clear 131 8 ...esktop/julia/neuralnetwork.jl clear 132 974 ...esktop/julia/neuralnetwork.jl clear 133 2432 ...esktop/julia/neuralnetwork.jl clear 135 1 ...esktop/julia/neuralnetwork.jl evaluate 90 2604 ...esktop/julia/neuralnetwork.jl evaluate 92 2 ...esktop/julia/neuralnetwork.jl evaluate 93 34 ...esktop/julia/neuralnetwork.jl evaluate 101 1 ...esktop/julia/neuralnetwork.jl evaluate 102 1 ...esktop/julia/neuralnetwork.jl evaluate 106 532 ...esktop/julia/neuralnetwork.jl propagateDerivatives 141 1127 ...esktop/julia/neuralnetwork.jl propagateDerivatives 142 1 ...esktop/julia/neuralnetwork.jl propagateDerivatives 143 4887 ...esktop/julia/neuralnetwork.jl propagateDerivatives 144 8887 ...esktop/julia/neuralnetwork.jl propagateDerivatives 145 10 ...esktop/julia/neuralnetwork.jl train 160 2 ...esktop/julia/neuralnetwork.jl train 161 9031 ...esktop/julia/neuralnetwork.jl train 163 1725 ...esktop/julia/neuralnetwork.jl train 166 77 ...esktop/julia/neuralnetwork.jl train 171 53 ...esktop/julia/neuralnetwork.jl train 175 8 ...esktop/julia/neuralnetwork.jl updateWeights 111 31 ...esktop/julia/neuralnetwork.jl updateWeights 112 2 ...esktop/julia/neuralnetwork.jl updateWeights 114 117 ...esktop/julia/neuralnetwork.jl updateWeights 115 1 ...esktop/julia/neuralnetwork.jl updateWeights 116 10932 REPL.jl eval_user_input 54 1 REPL.jl eval_user_input 55 1 array.jl - 719 1 array.jl getindex 271 10932 profile.jl anonymous 14 1 random.jl RandIntGen 179 7 reduce.jl _mapreduce 168 7 reduce.jl mapreduce_pairwise_impl 125 1 reduce.jl mapreduce_seq_impl 210 2 reduce.jl mapreduce_seq_impl 211 1 reduce.jl mapreduce_seq_impl 212 1 reduce.jl mapreduce_seq_impl 214 2 reduce.jl mapreduce_seq_impl 217 2 string.jl print 4 8 string.jl println 5 10933 task.jl anonymous 96
-
\$\begingroup\$ noob question: why don't you use a some kind of Matricial calculation instead of introducicing new class ? \$\endgroup\$Lucas Morin– Lucas Morin2014年08月15日 23:11:35 +00:00Commented Aug 15, 2014 at 23:11
-
\$\begingroup\$ @Imorin, because it's much more readable and elegant in this form, but you're right the efficient way is to vectorize it (or with Julia use nested loops). My hope was that Julia would be fast enough that I wouldn't have to write it in a different form, but that doesn't seem to be the case. \$\endgroup\$Set– Set2014年08月15日 23:41:14 +00:00Commented Aug 15, 2014 at 23:41
-
\$\begingroup\$ It is neither efficient nor readable to implement neural nets without matrix operations. \$\endgroup\$alfa– alfa2014年08月15日 23:58:15 +00:00Commented Aug 15, 2014 at 23:58
-
\$\begingroup\$ It should be efficient in Julia using nested loops, but apparently not using a network class structure. \$\endgroup\$Set– Set2014年08月16日 00:07:10 +00:00Commented Aug 16, 2014 at 0:07
2 Answers 2
Alright so I've done a lot of experimenting and the problem seems to be accessing data internal to a type via another type. Specifically accessing d.source.activation
appears to be about 100 times more costly than just accessing d.augmented
. If you replace d.source.activation
with 1.0
you'll more than double the speed.
It's also possible this is because the type info provided to the compiler concerning source
is an abstract type AbstractNode
, rather than a concrete type. But I know of no way around this.
Either way this is as far as I can tell a serious performance issue which I don't immediately see how to circumvent.
However the other two suspicious lines, upon closer consideration, seem actually to be about right, so it's just line 144 which was causing the slow down.
Moreover by getting rid of the syntactic sugar +=
in line 144, one can shave about 10 percent off the time, so there is an issue with that as well.
This change is predicated on the assumption that your program is currently giving you the correct output. If a BiasNode
or InputNode
is passed into either clear(...)
or propDer(...)
these functions will crash because they each access activationPrime
, a field which is unique to type Node
. If we change the parameter type of obj
in each of these functions to Node
we should see some speed-up in the execution of your program. The reason why is because we are no longer passing a parameter with a type that has arbitrary size with arbitrary fields (AbstractNode
), instead we are passing a parameter with a type that has explicit size and explicit fields (Node
). Also in clear(...)
I moved the access and modification of obj
's fields outside of the for loop, as they should only need to be modified once.
function clear(obj::Node)
obj.activation = -1.0
obj.activationPrime = -1.0
for d in obj.incomingEdges
# loop body
end
end
function propagateDerivatives(obj::Node, error::Float64)
# function body
end
-
\$\begingroup\$ But then these functions could not accept
InputNode
orBiasNode
. \$\endgroup\$Set– Set2014年08月15日 05:26:44 +00:00Commented Aug 15, 2014 at 5:26 -
\$\begingroup\$ Neither
InputNode
norBiasNode
have any incoming edges, thus the functions never attempt to access obj.activationPrime in their case. \$\endgroup\$Set– Set2014年08月15日 06:18:04 +00:00Commented Aug 15, 2014 at 6:18
Explore related questions
See similar questions with these tags.