Skip to main content
Code Review

Return to Question

Notice removed Authoritative reference needed by Community Bot
Bounty Ended with mochi's answer chosen by Community Bot
Improved code example
Source Link
Gilfoyle
  • 1.2k
  • 11
  • 21
from numpy import random, zeros, array, dot
from scipy.special import expit
import time 
def sigma(x):
 return expit(x)
def sigma_prime(x):
 u = expit(x)
 return u-u*u 
 
def SGD(I, L, batch_size, eta):
 
 images = len(L)
 
 # Pre-activation
 z = [zeros((layer_size[l],1)) for l in range(1,nn_size)]
 
 # Activations
 a = [zeros((layer_size[l],1)) for l in range(nn_size)]
 
 # Ground truth 
 y = zeros((images, layer_size[-1]))
 for i in range(images):
 y[i,L[i]] = 1.0
 
 while (1):
 
 t0 = time.time()
 # Create random batch
 batch = random.randint(0,images,batch_size)
 
 dW = [zeros((layer_size[l+1], layer_size[l])) for l in range(nn_size-1)]
 db = [zeros((layer_size[l],1)) for l in range(1, nn_size)]
 
 for i in batch: 
 # Feedforward
 a[0] = array([I[i]]).T
 for l in range(nn_size-1):
 z[l] = dot(W[l], a[l]) + b[l]
 a[l+1] = sigma(z[l])
 
 # Backpropagation
 delta = (a[nn_size-1]-array([y[i]]).T) * sigma_prime(z[nn_size-2])
 dW[nn_size-2] += dot(delta, a[nn_size-2].T)
 dW[nn_size-2] += delta.dot(a[nn_size-2].T)
 db[nn_size-2] += delta
 for l in reversed(range(nn_size-2)):
 delta = dot(W[l+1].T, delta) * sigma_prime(z[l])
 dW[l] += dot(delta, a[l].T)
 db[l] += delta
 
 # Update Weights and Biases
 for l in range(nn_size-1):
 W[l] += - eta * dW[l] / batch_size
 b[l] += - eta * db[l] / batch_size
 print(time.time() - t0)
 
input_size = 1000
output_size = 10
layer_size = [input_size, 30**2, 30**2, 30**2, output_size]
nn_size = len(layer_size)
layer_size = layer_size
# Weights
W = [random.randn(layer_size[l+1],layer_size[l]) for l in range(nn_size-1)]
# Bias
b = [random.randn(layer_size[l],1) for l in range(1,nn_size)]
# Some random training data with label
size_training_data = 1000
# Random data I of size "size_training_data" x "input_size"
I = random.rand(size_training_data, input_size)
# Label for all training data
L = random.randint(0,10, input_size)
batch_size = 100
eta = 0.1
SGD(I, L, batch_size, eta)

The output shows the time needed for one batch of size batch_size.

from numpy import random, zeros, array, dot
from scipy.special import expit
import time 
def sigma(x):
 return expit(x)
def sigma_prime(x):
 u = expit(x)
 return u-u*u 
 
def SGD(I, L, batch_size, eta):
 
 images = len(L)
 
 # Pre-activation
 z = [zeros((layer_size[l],1)) for l in range(1,nn_size)]
 
 # Activations
 a = [zeros((layer_size[l],1)) for l in range(nn_size)]
 
 # Ground truth 
 y = zeros((images, layer_size[-1]))
 for i in range(images):
 y[i,L[i]] = 1.0
 
 while (1):
 
 t0 = time.time()
 # Create random batch
 batch = random.randint(0,images,batch_size)
 
 dW = [zeros((layer_size[l+1], layer_size[l])) for l in range(nn_size-1)]
 db = [zeros((layer_size[l],1)) for l in range(1, nn_size)]
 
 for i in batch: 
 # Feedforward
 a[0] = array([I[i]]).T
 for l in range(nn_size-1):
 z[l] = dot(W[l], a[l]) + b[l]
 a[l+1] = sigma(z[l])
 
 # Backpropagation
 delta = (a[nn_size-1]-array([y[i]]).T) * sigma_prime(z[nn_size-2])
 dW[nn_size-2] += dot(delta, a[nn_size-2].T)
 dW[nn_size-2] += delta.dot(a[nn_size-2].T)
 db[nn_size-2] += delta
 for l in reversed(range(nn_size-2)):
 delta = dot(W[l+1].T, delta) * sigma_prime(z[l])
 dW[l] += dot(delta, a[l].T)
 db[l] += delta
 
 # Update Weights and Biases
 for l in range(nn_size-1):
 W[l] += - eta * dW[l] / batch_size
 b[l] += - eta * db[l] / batch_size
 print(time.time() - t0)
 
input_size = 1000
output_size = 10
layer_size = [input_size, 30**2, 30**2, 30**2, output_size]
nn_size = len(layer_size)
layer_size = layer_size
# Weights
W = [random.randn(layer_size[l+1],layer_size[l]) for l in range(nn_size-1)]
# Bias
b = [random.randn(layer_size[l],1) for l in range(1,nn_size)]
# Some random training data with label
size_training_data = 1000
I = random.rand(size_training_data, input_size)
L = random.randint(0,10, input_size)
batch_size = 100
eta = 0.1
SGD(I, L, batch_size, eta)
from numpy import random, zeros, array, dot
from scipy.special import expit
import time 
def sigma(x):
 return expit(x)
def sigma_prime(x):
 u = expit(x)
 return u-u*u 
 
def SGD(I, L, batch_size, eta):
 
 images = len(L)
 
 # Pre-activation
 z = [zeros((layer_size[l],1)) for l in range(1,nn_size)]
 
 # Activations
 a = [zeros((layer_size[l],1)) for l in range(nn_size)]
 
 # Ground truth 
 y = zeros((images, layer_size[-1]))
 for i in range(images):
 y[i,L[i]] = 1.0
 
 while (1):
 
 t0 = time.time()
 # Create random batch
 batch = random.randint(0,images,batch_size)
 
 dW = [zeros((layer_size[l+1], layer_size[l])) for l in range(nn_size-1)]
 db = [zeros((layer_size[l],1)) for l in range(1, nn_size)]
 
 for i in batch: 
 # Feedforward
 a[0] = array([I[i]]).T
 for l in range(nn_size-1):
 z[l] = dot(W[l], a[l]) + b[l]
 a[l+1] = sigma(z[l])
 
 # Backpropagation
 delta = (a[nn_size-1]-array([y[i]]).T) * sigma_prime(z[nn_size-2])
 dW[nn_size-2] += dot(delta, a[nn_size-2].T)
 dW[nn_size-2] += delta.dot(a[nn_size-2].T)
 db[nn_size-2] += delta
 for l in reversed(range(nn_size-2)):
 delta = dot(W[l+1].T, delta) * sigma_prime(z[l])
 dW[l] += dot(delta, a[l].T)
 db[l] += delta
 
 # Update Weights and Biases
 for l in range(nn_size-1):
 W[l] += - eta * dW[l] / batch_size
 b[l] += - eta * db[l] / batch_size
 print(time.time() - t0)
 
input_size = 1000
output_size = 10
layer_size = [input_size, 30**2, 30**2, 30**2, output_size]
nn_size = len(layer_size)
layer_size = layer_size
# Weights
W = [random.randn(layer_size[l+1],layer_size[l]) for l in range(nn_size-1)]
# Bias
b = [random.randn(layer_size[l],1) for l in range(1,nn_size)]
# Some random training data with label
size_training_data = 1000
# Random data I of size "size_training_data" x "input_size"
I = random.rand(size_training_data, input_size)
# Label for all training data
L = random.randint(0,10, input_size)
batch_size = 100
eta = 0.1
SGD(I, L, batch_size, eta)

The output shows the time needed for one batch of size batch_size.

Notice added Authoritative reference needed by Gilfoyle
Bounty Started worth 100 reputation by Gilfoyle
Tweeted twitter.com/StackCodeReview/status/977146011742240768
added 38 characters in body
Source Link
Gilfoyle
  • 1.2k
  • 11
  • 21

Here is my tested code with some pseudo training data:

Here is my code:

Here is my tested code with some pseudo training data:

Source Link
Gilfoyle
  • 1.2k
  • 11
  • 21

Deep Neural Network in Python

I have written a neural network in Python and focused on adaptability and performance. I want to use it to dive deeper into that field. I am far from being an expert in neural networks and the same goes for Python. I do not want to use Tensorflow since I really want to understand how a neural network works.

My questions are:

  • How can I increase the performance? At the moment it takes days to train the network

The code runs on a single core. But since every loop over the batches run independently it can be parallelized.

  • How can I parallelize the loop over the batches?

I found some tutorials on parallel loops in Python but I could not apply it to my problem.

Here is my code:

from numpy import random, zeros, array, dot
from scipy.special import expit
import time 
def sigma(x):
 return expit(x)
def sigma_prime(x):
 u = expit(x)
 return u-u*u 
 
def SGD(I, L, batch_size, eta):
 
 images = len(L)
 
 # Pre-activation
 z = [zeros((layer_size[l],1)) for l in range(1,nn_size)]
 
 # Activations
 a = [zeros((layer_size[l],1)) for l in range(nn_size)]
 
 # Ground truth 
 y = zeros((images, layer_size[-1]))
 for i in range(images):
 y[i,L[i]] = 1.0
 
 while (1):
 
 t0 = time.time()
 # Create random batch
 batch = random.randint(0,images,batch_size)
 
 dW = [zeros((layer_size[l+1], layer_size[l])) for l in range(nn_size-1)]
 db = [zeros((layer_size[l],1)) for l in range(1, nn_size)]
 
 for i in batch: 
 # Feedforward
 a[0] = array([I[i]]).T
 for l in range(nn_size-1):
 z[l] = dot(W[l], a[l]) + b[l]
 a[l+1] = sigma(z[l])
 
 # Backpropagation
 delta = (a[nn_size-1]-array([y[i]]).T) * sigma_prime(z[nn_size-2])
 dW[nn_size-2] += dot(delta, a[nn_size-2].T)
 dW[nn_size-2] += delta.dot(a[nn_size-2].T)
 db[nn_size-2] += delta
 for l in reversed(range(nn_size-2)):
 delta = dot(W[l+1].T, delta) * sigma_prime(z[l])
 dW[l] += dot(delta, a[l].T)
 db[l] += delta
 
 # Update Weights and Biases
 for l in range(nn_size-1):
 W[l] += - eta * dW[l] / batch_size
 b[l] += - eta * db[l] / batch_size
 print(time.time() - t0)
 
input_size = 1000
output_size = 10
layer_size = [input_size, 30**2, 30**2, 30**2, output_size]
nn_size = len(layer_size)
layer_size = layer_size
# Weights
W = [random.randn(layer_size[l+1],layer_size[l]) for l in range(nn_size-1)]
# Bias
b = [random.randn(layer_size[l],1) for l in range(1,nn_size)]
# Some random training data with label
size_training_data = 1000
I = random.rand(size_training_data, input_size)
L = random.randint(0,10, input_size)
batch_size = 100
eta = 0.1
SGD(I, L, batch_size, eta)
lang-py

AltStyle によって変換されたページ (->オリジナル) /