1

I have a simple neural network with 2 input neurons, 3 hidden neurons and 1 output neuron. hidden layer has bias.

I'm not used matrix operations to doing feed forward and backpropagation. when I run training function on a simple linear dataset, the error raises up and the predication result is wrong.

import random
from math import exp,pow,tanh
def random_weight():
 return random.random()
def sigmoid(x):
 return 1.0 / (1.0 + exp(-x))
def sigmoid_drv(x):
 return sigmoid(x)*(1.0-sigmoid(x))
w11_I = random_weight()
w12_I = random_weight()
w21_I = random_weight()
w22_I = random_weight()
w31_I = random_weight()
w32_I = random_weight()
w11_II = random_weight()
w12_II = random_weight()
w13_II = random_weight()
b_I = 1
activation = sigmoid
activation_drv = sigmoid_drv
def predict(x1,x2):
 global w11_I,w12_I,w21_I,w22_I,w31_I,w32_I,w11_II,w12_II,w13_II,b_I
 a1_I = w11_I*x1 + w12_I*x2 + b_I
 z1_I = activation(a1_I)
 a2_I = w21_I*x1 + w22_I*x2 + b_I
 z2_I = activation(a2_I)
 a3_I = w31_I*x1 + w32_I*x2 + b_I
 z3_I = activation(a3_I)
 a1_II = w11_II*z1_I + w12_II*z2_I + w13_II*z3_I
 z1_II = activation(a1_II)
 return a1_I, z1_I, a2_I, z2_I, a3_I, z3_I, a1_II, z1_II
def train(x1,x2,y,alpha):
 global w11_I,w12_I,w21_I,w22_I,w31_I,w32_I,w11_II,w12_II,w13_II,b_I
 a1_I, z1_I, a2_I, z2_I, a3_I, z3_I, a1_II, z1_II = predict(x1,x2)
 error = 0.5 * pow(y-z1_II,2)
 delta = y-z1_II * activation_drv(a1_II)
 w11_II += delta * z1_I * alpha
 w12_II += delta * z2_I * alpha
 w13_II += delta * z3_I * alpha
 w11_I += delta * w11_II * activation_drv(a1_I) * x1 * alpha
 w12_I += delta * w11_II * activation_drv(a1_I) * x2 * alpha
 w21_I += delta * w12_II * activation_drv(a2_I) * x1 * alpha
 w22_I += delta * w12_II * activation_drv(a2_I) * x2 * alpha
 w31_I += delta * w13_II * activation_drv(a3_I) * x1 * alpha
 w32_I += delta * w13_II * activation_drv(a3_I) * x2 * alpha
 b_I += (delta * w11_II * activation_drv(a1_I) + delta * w12_II * activation_drv(a2_I) + delta * w13_II * activation_drv(a3_I)) * alpha
 return error
data = [
 [0,0,0],
 [0,1,1],
 [1,0,1],
 [1,1,1],
]
for i in range(0,10):
 err = 0
 dt = data[::]
 random.shuffle(dt)
 for j in dt:
 err += train(j[0],j[1],j[2],0.01)
 print(err)
print("-"*30)
for j in data:
 _, _, _, _, _, _, _, res = predict(j[0],j[1])
 print(j[0],",",j[1],"=",res)

For example the result of the code is:

0.363894453262
0.366966815948
0.366406041572
0.369982058232
0.36988850637
0.375869833099
0.378106172616
0.380456639936
0.37901554717
0.383723920259
------------------------------
(0, ',', 0, '=', 0.8439871540493414)
(0, ',', 1, '=', 0.861714406183168)
(1, ',', 0, '=', 0.8515477541104413)
(1, ',', 1, '=', 0.8676931366534011)

---------------- UPDATE ----------------

I change codes to this :

import random
from math import exp,pow
def random_weight():
 return random.random()
def sigmoid(x):
 return 1.0 / (1.0 + exp(-x))
def sigmoid_drv(x):
 return sigmoid(x)*(1.0-sigmoid(x))
w11_I = random_weight()
w12_I = random_weight()
w21_I = random_weight()
w22_I = random_weight()
w31_I = random_weight()
w32_I = random_weight()
w11_II = random_weight()
w12_II = random_weight()
w13_II = random_weight()
b_I = random_weight()
activation = sigmoid
activation_drv = sigmoid_drv
def predict(x1,x2):
 global w11_I,w12_I,w21_I,w22_I,w31_I,w32_I,w11_II,w12_II,w13_II,b_I
 a1_I = w11_I*x1 + w12_I*x2 + b_I
 z1_I = activation(a1_I)
 a2_I = w21_I*x1 + w22_I*x2 + b_I
 z2_I = activation(a2_I)
 a3_I = w31_I*x1 + w32_I*x2 + b_I
 z3_I = activation(a3_I)
 a1_II = w11_II*z1_I + w12_II*z2_I + w13_II*z3_I
 z1_II = activation(a1_II)
 return a1_I, z1_I, a2_I, z2_I, a3_I, z3_I, a1_II, z1_II
def train(x1,x2,y,alpha):
 global w11_I,w12_I,w21_I,w22_I,w31_I,w32_I,w11_II,w12_II,w13_II,b_I
 a1_I, z1_I, a2_I, z2_I, a3_I, z3_I, a1_II, z1_II = predict(x1,x2)
 error = 0.5 * pow(z1_II-y,2)
 delta = z1_II-y * activation_drv(a1_II)
 d_w11_II = delta * z1_I * alpha 
 d_w12_II = delta * z2_I * alpha
 d_w13_II = delta * z3_I * alpha
 d_w11_I = delta * w11_II * activation_drv(a1_I) * x1 * alpha
 d_w12_I = delta * w11_II * activation_drv(a1_I) * x2 * alpha
 d_w21_I = delta * w12_II * activation_drv(a2_I) * x1 * alpha
 d_w22_I = delta * w12_II * activation_drv(a2_I) * x2 * alpha
 d_w31_I = delta * w13_II * activation_drv(a3_I) * x1 * alpha
 d_w32_I = delta * w13_II * activation_drv(a3_I) * x2 * alpha
 d_b_I = (delta * w11_II * activation_drv(a1_I) + delta * w12_II * activation_drv(a2_I) + delta * w13_II * activation_drv(a3_I)) * alpha
 w11_II -= d_w11_II
 w12_II -= d_w12_II
 w13_II -= d_w13_II
 w11_I -= d_w11_I
 w12_I -= d_w12_I
 w21_I -= d_w21_I
 w22_I -= d_w22_I
 w31_I -= d_w31_I
 w32_I -= d_w32_I
 b_I -= d_b_I
 return error
data = [
 [0,0,0],
 [0,1,0],
 [1,0,0],
 [1,1,1],
]
for i in range(0,10):
 err = 0
 dt = data[::]
 random.shuffle(dt)
 for j in dt:
 err += train(j[0],j[1],j[2],0.01)
 print(err)
print("-"*30)
for j in data:
 _, _, _, _, _, _, _, res = predict(j[0],j[1])
 print(j[0],",",j[1],"=",res)

I'm subtract weight errors with weights now. Error of network reduces. But prediction is still wrong.

The result of above code:

0.7793443881847488
0.7577581315356949
0.7432698222320477
0.7316129719356839
0.7160385688813552
0.6943522088277978
0.6862277294774705
0.6656984495700775
0.6584361784187711
0.6410006126876817
------------------------------
0 , 0 = 0.6049212721996029
0 , 1 = 0.6227402202339664
1 , 0 = 0.6139758543180651
1 , 1 = 0.6293581473456563
asked Apr 25, 2020 at 12:23
5
  • What is the error message? Commented Apr 25, 2020 at 12:26
  • There is no error message. Code execute correctly. The "Error" of neural network not approaching to zero. There is mathematical problems in this code. Commented Apr 25, 2020 at 12:33
  • Ah sorry, I misunderstood "the error rises up". Commented Apr 25, 2020 at 12:33
  • Ok, but what were the results of your own debugging? Which parts of your code did you determine to be obviously correct and about which parts are you unsure? Commented Apr 25, 2020 at 12:36
  • The prediction is working correct. I think the problem is calculating error of weights and updating them. Maybe in chain rule. Commented Apr 25, 2020 at 12:38

2 Answers 2

1

One possible error is in the calculation of delta:

delta = z1_II-y * activation_drv(a1_II)

Add braces and change this to:

delta = (z1_II-y) * activation_drv(a1_II)
answered Apr 25, 2020 at 14:32
Sign up to request clarification or add additional context in comments.

1 Comment

Thank you, that was an obvious mistake! But still my results are wrong!
1

I found the problem the sigmoid function was not good for this network. I change it to tanh and prediction results is correct now.

the final code :

import random
from math import exp,pow
class ANN:
 def random_weight(self):
 return random.random()
 def sigmoid(self,x):
 return 1.0 / (1.0 + exp(-x))
 def sigmoid_drv(self,x):
 return self.sigmoid(x)*(1.0-self.sigmoid(x)) 
 def tanh(self, x):
 return (exp(x) - exp(-x)) / (exp(x) + exp(-x))
 def tanh_drv(self,x):
 return 1 - pow(self.tanh(x),2)
 def __init__(self):
 self.w11_I = self.random_weight()
 self.w12_I = self.random_weight()
 self.w21_I = self.random_weight()
 self.w22_I = self.random_weight()
 self.w31_I = self.random_weight()
 self.w32_I = self.random_weight()
 self.w11_II = self.random_weight()
 self.w12_II = self.random_weight()
 self.w13_II = self.random_weight()
 self.b_I = self.random_weight()
 self.activation = self.tanh
 self.activation_drv = self.tanh_drv
 def predict(self,x1,x2):
 a1_I = self.w11_I*x1 + self.w12_I*x2 + self.b_I
 z1_I = self.activation(a1_I)
 a2_I = self.w21_I*x1 + self.w22_I*x2 + self.b_I
 z2_I = self.activation(a2_I)
 a3_I = self.w31_I*x1 + self.w32_I*x2 + self.b_I
 z3_I = self.activation(a3_I)
 a1_II = self.w11_II*z1_I + self.w12_II*z2_I + self.w13_II*z3_I
 z1_II = self.activation(a1_II)
 return a1_I, z1_I, a2_I, z2_I, a3_I, z3_I, a1_II, z1_II
 def train(self,x1,x2,y,alpha):
 a1_I, z1_I, a2_I, z2_I, a3_I, z3_I, a1_II, z1_II = self.predict(x1,x2)
 error = 0.5 * pow(z1_II-y,2)
 delta = (z1_II-y) * self.activation_drv(a1_II)
 d_w11_II = delta * z1_I * alpha 
 d_w12_II = delta * z2_I * alpha
 d_w13_II = delta * z3_I * alpha
 d_w11_I = delta * self.w11_II * self.activation_drv(a1_I) * x1 * alpha
 d_w12_I = delta * self.w11_II * self.activation_drv(a1_I) * x2 * alpha
 d_w21_I = delta * self.w12_II * self.activation_drv(a2_I) * x1 * alpha
 d_w22_I = delta * self.w12_II * self.activation_drv(a2_I) * x2 * alpha
 d_w31_I = delta * self.w13_II * self.activation_drv(a3_I) * x1 * alpha
 d_w32_I = delta * self.w13_II * self.activation_drv(a3_I) * x2 * alpha
 d_b_I = (delta * self.w11_II * self.activation_drv(a1_I) + delta * self.w12_II * self.activation_drv(a2_I) + delta * self.w13_II * self.activation_drv(a3_I)) * alpha
 self.w11_II -= d_w11_II
 self.w12_II -= d_w12_II
 self.w13_II -= d_w13_II
 self.w11_I -= d_w11_I
 self.w12_I -= d_w12_I
 self.w21_I -= d_w21_I
 self.w22_I -= d_w22_I
 self.w31_I -= d_w31_I
 self.w32_I -= d_w32_I
 self.b_I -= d_b_I
 return error
model = ANN()
data = [
 [0,0,0],
 [0,1,0],
 [1,0,0],
 [1,1,1],
]
for i in range(0,200):
 err = 0
 dt = data[::]
 random.shuffle(dt)
 for j in dt:
 err += model.train(j[0],j[1],j[2],0.1)
 print(err)
print("-"*30)
for j in data:
 _, _, _, _, _, _, _, res = model.predict(j[0],j[1])
 print(j[0],",",j[1],"=",res)

Result of code :

...
0.1978539306282795
0.19794670251861882
0.19745074826953185
0.19529942727878868
0.19779970636626873
0.19661596298810918
------------------------------
0 , 0 = -0.24217968147818447
0 , 1 = 0.236033934015224
1 , 0 = 0.24457439328909888
1 , 1 = 0.5919949310028919
answered Apr 25, 2020 at 15:00

Comments

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.