|
41 | 41 | DEFAULT_ACTIVATION = 'logistic' # Or 'tanh' for tanH as activation function.
|
42 | 42 | LEARNING_RATE = 0.1 # Default learning rate
|
43 | 43 | ITERATION = 5000
|
44 | | -HIDDEN_LAYER_SIZES = [100, 10] #Hidden layer structure. The definition of [100, 10] is for multiple hidden layers, first layer with 100 neurals, and second hidden layer with 10 neurals, |
| 44 | +HIDDEN_LAYER_SIZES = [100, 100, 50] #Hidden layer structure. The definition of [100, 100, 50] is for multiple hidden layers, first layer with 100 neurals, and second hidden layer with 100 neurals, than 50 in third layer, |
45 | 45 | LOWER_BOUND_INIT_WEIGHT = 0 #Lower bound of weight for each connection.
|
46 | 46 | UPPER_BOUND_INIT_WEIGHT = 1 #Upper bound of weight for each connection.
|
47 | 47 | BINARY_CLASSIFICATION = True #The output will be either 0 or 1 if it is True or present the actual output value if it set to False.
|
@@ -160,112 +160,111 @@ def initial_weights(self, network_layer_sizes):
|
160 | 160 | # self.weights = weights array = [[Weights of level-01], [Weights of level-12], ..., [Weights of level-(L-1)(L)]].
|
161 | 161 | # For [Weights of level-01]=[[w01, w02, ..., w0d], [w11, w12, ..., w1d], ... [wd1, wd2, ..., wdd]]
|
162 | 162 |
|
163 | | - _weights = [] |
164 | | - _scale = 0 # optimal scale of weight is m**(-1/2) |
| 163 | + weights = [] |
| 164 | + scale = 0 # optimal scale of weight is m**(-1/2) |
165 | 165 | for l in range(1, len(network_layer_sizes)):
|
166 | | - _scale = (network_layer_sizes[l-1])**(-1/2) |
167 | | - _weights.append(((self.weight_high)-(self.weight_low))*np.random.normal(size=(network_layer_sizes[l-1], network_layer_sizes[l]))+(self.weight_low)) |
168 | | -# _weights.append(((self.weight_high)-(self.weight_low))*np.random.normal(scale=_scale, size=(network_layer_sizes[l-1], network_layer_sizes[l]))+(self.weight_low)) |
| 166 | + scale = (network_layer_sizes[l-1])**(-1/2) |
| 167 | + weights.append(((self.weight_high)-(self.weight_low))*np.random.normal(size=(network_layer_sizes[l-1], network_layer_sizes[l]))+(self.weight_low)) |
| 168 | +# weights.append(((self.weight_high)-(self.weight_low))*np.random.normal(scale=scale, size=(network_layer_sizes[l-1], network_layer_sizes[l]))+(self.weight_low)) |
169 | 169 | np.random.random
|
170 | | - self.weights = _weights |
| 170 | + self.weights = weights |
171 | 171 | return self.weights
|
172 | 172 |
|
173 | 173 | def set_layer_sizes(self, training_data, training_data_label):
|
174 | 174 | #Construct the whole neural network structure, include [input layer sizes, hidden layer 1 sizes, ...hidden layer L sizes, output layer sizes]
|
175 | | - _dim = 0 |
176 | | - _network_layer_sizes = [] |
177 | | - _dim = training_data.ndim; |
178 | | - if _dim != 0: |
| 175 | + dim = 0 |
| 176 | + network_layer_sizes = [] |
| 177 | + dim = training_data.ndim; |
| 178 | + if dim != 0: |
179 | 179 | self.input_numbers, self.input_dimensions = training_data.shape
|
180 | 180 | else:
|
181 | 181 | pass
|
182 | | - _dim = training_data_label.ndim; |
183 | | - if _dim !=0: |
184 | | - if _dim == 1: |
| 182 | + dim = training_data_label.ndim; |
| 183 | + if dim !=0: |
| 184 | + if dim == 1: |
185 | 185 | self.output_numbers = training_data_label.shape[0]
|
186 | 186 | self.output_dimensions = 1;
|
187 | 187 | else:
|
188 | 188 | self.output_numbers, self.output_dimensions = training_data_label.shape
|
189 | 189 | else:
|
190 | 190 | pass
|
191 | 191 |
|
192 | | - _network_layer_sizes.append(self.input_dimensions+1) # add X0 |
| 192 | + network_layer_sizes.append(self.input_dimensions+1) # add X0 |
193 | 193 |
|
194 | 194 | for i in self.hidden_layer_sizes:
|
195 | | - _network_layer_sizes.append(i) |
| 195 | + network_layer_sizes.append(i) |
196 | 196 |
|
197 | | - _network_layer_sizes.append(self.output_dimensions) |
198 | | - self.network_layer_sizes = np.array(_network_layer_sizes) |
| 197 | + network_layer_sizes.append(self.output_dimensions) |
| 198 | + self.network_layer_sizes = np.array(network_layer_sizes) |
199 | 199 |
|
200 | 200 | return self.network_layer_sizes
|
201 | 201 |
|
202 | 202 | def feed_forward(self, input_data):
|
203 | | - _X = [np.concatenate((np.ones(1).T, np.array(input_data)), axis=0)] #add bias unit [array([])] |
204 | | - _network_layer_sizes = self.network_layer_sizes |
205 | | - _W = self.weights |
206 | | - _wijxi = [] |
207 | | - _xj = [] |
| 203 | + X = [np.concatenate((np.ones(1).T, np.array(input_data)), axis=0)] #add bias unit [array([])] |
| 204 | + W = self.weights |
| 205 | + wijxi = [] |
| 206 | + xj = [] |
208 | 207 |
|
209 | | - for l in range(0, len(_W)): |
210 | | - _wijxi = np.dot(_X[l], _W[l]) |
211 | | - _xj = self.activation(_wijxi) |
| 208 | + for l in range(0, len(W)): |
| 209 | + wijxi = np.dot(X[l], W[l]) |
| 210 | + xj = self.activation(wijxi) |
212 | 211 | # Setup bias term for each hidden layer, x0=1
|
213 | | - if l < len(_W)-1: |
214 | | - _xj[0] = 1 |
215 | | - _X.append(_xj) |
| 212 | + if l < len(W)-1: |
| 213 | + xj[0] = 1 |
| 214 | + X.append(xj) |
216 | 215 |
|
217 | | - self.X = _X |
218 | | - return _X[-1] #return the feed forward result of final level. |
| 216 | + self.X = X |
| 217 | + return X[-1] #return the feed forward result of final level. |
219 | 218 |
|
220 | 219 | def back_propagate(self, output, label_data):
|
221 | 220 | X = self.X
|
222 | 221 | W = list(self.weights) #self.weights=<class list>[array([ndarray[100],ndarray[100],...X961]), array(ndarray[1],ndarray[1],...X100)]
|
223 | 222 | avg_err = []
|
224 | 223 | Delta = []
|
225 | | - _x = [] |
226 | | - _d = [] |
227 | | - _w = [] |
228 | | - _y = [] |
| 224 | + x = [] |
| 225 | + d = [] |
| 226 | + w = [] |
| 227 | + y = [] |
229 | 228 |
|
230 | | - _y = np.atleast_2d(label_data) |
231 | | - _x = np.atleast_2d(output) |
| 229 | + y = np.atleast_2d(label_data) |
| 230 | + x = np.atleast_2d(output) |
232 | 231 | # Base level L delta calculation.
|
233 | | - avg_err = np.average(_x - _y) |
234 | | - Delta = [self.error_term_derivation(_x, _y) * self.activation_derivation(_x)] # Delta = error term derivation * activation function derivation |
| 232 | + avg_err = np.average(x - y) |
| 233 | + Delta = [self.error_term_derivation(x, y) * self.activation_derivation(x)] # Delta = error term derivation * activation function derivation |
235 | 234 | # #<class list>[array([])]
|
236 | 235 |
|
237 | 236 | # Calculate all deltas and adjust weights
|
238 | 237 | for l in range(len(X)-2, 0, -1):
|
239 | | - _d = np.atleast_2d(Delta[-1]) |
240 | | - _x = np.atleast_2d(X[l]) |
241 | | - _w = np.array(W[l]) |
| 238 | + d = np.atleast_2d(Delta[-1]) |
| 239 | + x = np.atleast_2d(X[l]) |
| 240 | + w = np.array(W[l]) |
242 | 241 |
|
243 | | - Delta.append( self.activation_derivation(_x) * Delta[-1].dot(_w.T) ) |
244 | | - W[l] -= self.learning_rate * _x.T.dot(_d) |
| 242 | + Delta.append( self.activation_derivation(x) * Delta[-1].dot(w.T) ) |
| 243 | + W[l] -= self.learning_rate * x.T.dot(d) |
245 | 244 |
|
246 | 245 | #Calculate the weight of input layer and update weight array
|
247 | | - _x = np.atleast_2d(X[l-1]) |
248 | | - _d = np.atleast_2d(Delta[-1]) |
249 | | - W[l-1] -= self.learning_rate * _x.T.dot(_d) |
| 246 | + x = np.atleast_2d(X[l-1]) |
| 247 | + d = np.atleast_2d(Delta[-1]) |
| 248 | + W[l-1] -= self.learning_rate * x.T.dot(d) |
250 | 249 |
|
251 | 250 | self.weights = W
|
252 | 251 | return avg_err
|
253 | 252 |
|
254 | 253 | def predict(self, x):
|
255 | | - _r = [] |
256 | | - _r = self.feed_forward(x[0]) |
257 | | - _enable_binary_classification = self.enable_binary_classification |
| 254 | + r = [] |
| 255 | + r = self.feed_forward(x[0]) |
| 256 | + enable_binary_classification = self.enable_binary_classification |
258 | 257 |
|
259 | 258 | # Enable the binary classification on predict results.
|
260 | | - if _enable_binary_classification and self.activation == self.logistic: |
261 | | - for i in range(len(_r)): |
262 | | - if _r[i] >= THRESHOLD: |
263 | | - _r[i] = 1 |
| 259 | + if enable_binary_classification and self.activation == self.logistic: |
| 260 | + for i in range(len(r)): |
| 261 | + if r[i] >= THRESHOLD: |
| 262 | + r[i] = 1 |
264 | 263 | else:
|
265 | | - _r[i] = 0 |
| 264 | + r[i] = 0 |
266 | 265 | else:
|
267 | 266 | pass
|
268 | | - return _r |
| 267 | + return r |
269 | 268 |
|
270 | 269 | def execute(self, training_data, training_data_label):
|
271 | 270 | '''
|
@@ -328,8 +327,8 @@ def execute(self, training_data, training_data_label):
|
328 | 327 | nn.execute(images, labels)
|
329 | 328 | total = 0
|
330 | 329 | correct = 0
|
331 | | - _dim = np.array(labels).ndim; |
332 | | - if _dim == 1: |
| 330 | + dim = np.array(labels).ndim; |
| 331 | + if dim == 1: |
333 | 332 | threshold_array = np.array(THRESHOLD)
|
334 | 333 | else:
|
335 | 334 | threshold_array = np.array(THRESHOLD)*np.array(labels).shape[1]
|
|
0 commit comments