2
\$\begingroup\$

This is a follow up to the code found here:

Multithreaded HD Image Processing + Logistic reg. Classifier + Visualization

Description:

This code takes a label and a folder path of subfolders as input that have certain labels ex: trees, cats with each folder containing a list of HD photos corresponding to the folder name, then a multithreaded image processor converts data to .h5 format and classifies photos with the given label using logistic regression or a neural network.

The following link has the code below as well as the necessary .h5 files to make it work and demonstrate an example on classifying dog photos but feel free to test with your own image data and i'm awaiting your feedback for optimizations/improvements/reducing runtime...

Updates:

  • Added a neural network implementation based on Andrew NG's Deep learning course
  • Wrapped everything into a class ImageClassifier
  • Added some logging and more error checks.
  • Better organization of the code.
  • Added more training examples

Running the code with random_seed=151 should give the following results:

  • Initial sample:

initial

  • Sample of the results with 72% accuracy on the test set:

res

  • Learning curve(neural net) max_iter=7000, learning_rate=0.015:

lc

Code:

from concurrent.futures import ThreadPoolExecutor, as_completed
import matplotlib.pyplot as plt
from time import perf_counter
import pandas as pd
import numpy as np
import random
import shutil
import cv2
import os
class ImageClassifier:
 """
 A tool for classifying and labeling HD images.
 """
 def __init__(
 self,
 folder_path,
 target_label,
 new_image_size=(80, 80),
 threads=5,
 fig_size=(9, 9),
 random_seed=None,
 save_figs=False,
 norm_value=255,
 sample_dimensions=(2, 5),
 display_progress=True,
 log_file=None,
 layer_dimensions=(20, 7, 5, 1),
 initial_parameter_mode='he',
 learning_rate=0.0005,
 max_iter=2000,
 display_nth_iteration=100,
 test_size=0.2,
 display_figs=None,
 new_hdf=False,
 show_figs=True,
 ):
 """
 Initialize classification of a given folder.
 Args:
 folder_path: Path to folder containing labeled sub-folders of images.
 target_label: One of the sub-folders name (label to classify).
 new_image_size: Image resizing new dimensions.
 threads: Number of image processing threads.
 fig_size: Display figure dimensions.
 random_seed: int representing a random seed.
 save_figs: If True, all figures specified will be saved.
 norm_value: Normalization value, in this case, 255 the maximum value of a pixel channel.
 sample_dimensions: (n_rows, n_columns) dimensions of sample display subplots.
 display_progress: If False, progress and results of the classification will not be displayed.
 log_file: Path to .txt file name to log the classification progress and results
 layer_dimensions: Neural network hidden layer dimensions.
 initial_parameter_mode: Initial model parameter initialization mode:
 'sq': Square root
 'z': Zeros
 learning_rate: The learning rate of the classifier.
 max_iter: Maximum number of iterations for the optimization algorithm(gradient descent)
 display_nth_iteration: Display current cost every n iterations.
 'lr': logistic regression.
 'nn': Neural network.
 test_size: Percentage of the test set size.
 display_figs: A list of figures to plot.
 'i': If in list, initial sample will be displayed.
 'lc': If in list, the learning curve will be displayed.
 'r': If in list, the results correct sample will be displayed.
 'e': If in list, the results false sample will be displayed.
 new_hdf: If True, New .hdf file will be created for every sub-folder.
 show_figs: If False, plotted figures will not be displayed.
 """
 assert os.path.isdir(folder_path)
 self.folder_path = folder_path
 if target_label not in os.listdir(folder_path):
 raise ValueError(
 f'Invalid label: {target_label} Folder not found in {folder_path}'
 )
 self.target_label = target_label
 self.new_image_size = new_image_size
 self.threads = threads
 self.fig_size = fig_size
 if random_seed:
 np.random.seed(random_seed)
 self.random_seed = random_seed
 self.save_figs = save_figs
 self.norm_value = norm_value
 self.sample_dimensions = sample_dimensions
 self.display_progress = display_progress
 self.log_file = log_file
 self.layer_dimensions = layer_dimensions
 self.initial_parameter_mode = initial_parameter_mode
 self.learning_rate = learning_rate
 self.max_iter = max_iter
 self.display_nth_iteration = display_nth_iteration
 assert isinstance(display_nth_iteration, int)
 self.test_size = test_size
 assert 0 <= test_size <= 1
 self.display_figs = display_figs
 self.new_hdf = new_hdf
 self.flat_dimensions = []
 self.show_figs = show_figs
 def display_message(self, message):
 """
 Print a message to the console and log to a .txt file.
 Args:
 message: Message to be printed.
 Return:
 None
 """
 if self.display_progress:
 print(message)
 if self.log_file:
 print(message, end='\n', file=open(self.log_file, 'a'))
 def read_and_resize(self, image):
 """
 Read and resize image.
 Args:
 image: Image path.
 Return:
 Resized Image.
 """
 try:
 image = cv2.imread(image)
 return cv2.resize(image, self.new_image_size)
 except cv2.error:
 pass
 def folder_to_hdf(self, folder_path):
 """
 Save a folder images to hdf format.
 Args:
 folder_path: Path to folder containing images.
 Return:
 None
 """
 label = folder_path.split('/')[-2]
 data, resized = pd.DataFrame(), []
 with ThreadPoolExecutor(max_workers=self.threads) as executor:
 future_resized_images = {
 executor.submit(self.read_and_resize, folder_path + img): img
 for img in os.listdir(folder_path)
 if img and img != '.DS_Store'
 }
 for future in as_completed(future_resized_images):
 result = future.result()
 resized.append(result)
 self.display_message(
 f'Processing ({label})-{future_resized_images[future]} ... done.'
 )
 del future_resized_images[future]
 data['Images'] = resized
 data['Label'] = label
 data.to_hdf(folder_path + label + '.h5', label)
 def folders_to_hdf(self, sub_folders=None):
 """
 Convert image data(for every sub-folder self.folder_path) to .hdf format.
 Args:
 sub_folders: A list of sub_folder names to convert.
 Return:
 None
 """
 if not sub_folders:
 for folder_name in os.listdir(self.folder_path):
 if folder_name != '.DS_Store':
 path = ''.join([self.folder_path, folder_name, '/'])
 self.folder_to_hdf(path)
 if sub_folders:
 for folder_name in sub_folders:
 path = ''.join([self.folder_path, folder_name, '/'])
 if folder_name not in os.listdir(self.folder_path):
 raise FileNotFoundError(f'Folder {path}')
 self.folder_to_hdf(path)
 def clear_hdf(self, sub_folders=None):
 """
 Delete every .h5 for every sub-folder in self.folder_path.
 Args:
 sub_folders: A list of sub_folder names to clear from .hdf files.
 Return:
 None
 """
 if not sub_folders:
 for folder_name in os.listdir(self.folder_path):
 if folder_name != '.DS_Store':
 try:
 file_name = ''.join(
 [self.folder_path, folder_name, '/', folder_name, '.h5']
 )
 os.remove(file_name)
 self.display_message(f'Removed {file_name.split("/")[-1]}')
 except FileNotFoundError:
 pass
 if sub_folders:
 for folder_name in sub_folders:
 file_name = ''.join(
 [self.folder_path, folder_name, '/', folder_name, '.h5']
 )
 if folder_name not in os.listdir(self.folder_path):
 raise FileNotFoundError(f'File not found {file_name}')
 try:
 os.remove(file_name)
 self.display_message(f'Removed {file_name.split("/")[-1]}')
 except FileNotFoundError:
 pass
 def load_hdf(self, sub_folders=None):
 """
 Load classification data from .h5 files.
 Args:
 sub_folders: Sub-folders to load.
 Return:
 (x, y, frames).
 """
 file_names = []
 if not sub_folders:
 file_names = [
 ''.join([self.folder_path, folder_name, '/', folder_name, '.h5'])
 for folder_name in os.listdir(self.folder_path)
 if folder_name != '.DS_Store'
 ]
 if sub_folders:
 for folder_name in sub_folders:
 path = ''.join([self.folder_path, folder_name, '/', folder_name, '.h5'])
 if folder_name not in os.listdir(self.folder_path):
 raise FileNotFoundError(f'File not found {path}')
 file_names = [
 ''.join([self.folder_path, folder_name, '/', folder_name, '.h5'])
 for folder_name in sub_folders
 ]
 frames = [pd.read_hdf(file_name) for file_name in file_names]
 frames = pd.concat(frames).dropna()
 frames['Classification'] = 0
 frames.loc[frames['Label'] == self.target_label, 'Classification'] = 1
 new_index = np.random.permutation(frames.index)
 frames.index = new_index
 frames.sort_index(inplace=True)
 image_data, labels = (
 np.array(list(frames['Images'])),
 np.array(list(frames['Classification'])),
 )
 return image_data, labels, frames
 def display_sample_images(self, title, image_data, labels=None):
 """
 Plot and display a sample of size(self.sample_dimensions).
 Args:
 title: Title of the sample image.
 image_data: numpy array of image data.
 labels: numpy array of label data(0s and 1s).
 Return:
 None
 """
 if not self.show_figs and not self.save_figs:
 return
 rows, columns = self.sample_dimensions
 fig = plt.figure(figsize=self.fig_size)
 plt.title(title)
 for i in range(rows * columns):
 img = image_data[i]
 ax = fig.add_subplot(rows, columns, i + 1)
 if isinstance(labels, np.ndarray):
 ax.title.set_text(f'Prediction: {labels[i]}')
 plt.imshow(img)
 if self.save_figs:
 plt.savefig(title + '.png')
 if self.show_figs:
 plt.show()
 def pre_process(self, sub_folders=None):
 """
 Split the data into train and test sets and prepare the data for further processing.
 Args:
 sub_folders: Sub-folders to load.
 Return:
 x_train, y_train, x_test, y_test, frames.
 """
 image_data, labels, frames = self.load_hdf(sub_folders)
 rows, columns = self.sample_dimensions
 total_images = len(image_data)
 title = (
 f'Initial(before prediction) {rows} x {columns} '
 f'data sample (Classification of {self.target_label})'
 )
 if self.display_figs and 'i' in self.display_figs:
 self.display_sample_images(title, image_data)
 image_data = image_data.reshape(total_images, -1) / self.norm_value
 labels = labels.reshape(total_images, -1)
 separation_index = int(self.test_size * total_images)
 x_train = image_data[separation_index:].T
 y_train = labels[separation_index:].T
 x_test = image_data[:separation_index].T
 y_test = labels[:separation_index].T
 self.flat_dimensions.append(x_train.shape[0])
 self.display_message(f'Total number of images: {total_images}')
 self.display_message(f'x_train shape: {x_train.shape}')
 self.display_message(f'y_train shape: {y_train.shape}')
 self.display_message(f'x_test shape: {x_test.shape}')
 self.display_message(f'y_test shape: {y_test.shape}')
 return x_train, y_train, x_test, y_test, frames
 def initialize_parameters(self):
 """
 Initialize weights and bias for the given layer of the neural network dimensions.
 Return:
 A dictionary containing parameters [w1, w2, ... wn] and [b1, b2, ... bn].
 """
 parameters = {}
 self.flat_dimensions.extend(self.layer_dimensions)
 layer_dimensions = self.flat_dimensions
 for n in range(1, len(layer_dimensions)):
 if self.initial_parameter_mode == 'he':
 parameters['w' + str(n)] = np.random.randn(
 layer_dimensions[n], layer_dimensions[n - 1]
 ) * (np.sqrt(2.0 / layer_dimensions[n - 1]))
 parameters['b' + str(n)] = np.zeros((layer_dimensions[n], 1))
 if self.initial_parameter_mode == 'sq':
 parameters['w' + str(n)] = np.random.randn(
 layer_dimensions[n], layer_dimensions[n - 1]
 ) / np.sqrt(layer_dimensions[n - 1])
 parameters['b' + str(n)] = np.zeros((layer_dimensions[n], 1))
 if self.initial_parameter_mode == 'z':
 parameters['w' + str(n)] = np.zeros(
 (layer_dimensions[n], layer_dimensions[n - 1])
 )
 parameters['b' + str(n)] = np.zeros((layer_dimensions[n], 1))
 return parameters
 @staticmethod
 def sigmoid(x):
 """
 Calculate sigmoid function(logistic regression).
 Args:
 x: Image data in the following shape(pixels * pixels * 3, number of images).
 Return:
 sigmoid(x).
 """
 return 1 / (1 + np.exp(-x))
 @staticmethod
 def sigmoid_nn(x):
 """
 Apply the sigmoid function(neural network).
 Args:
 x: numpy array of inputs.
 Return:
 a, x
 a: Output of sigmoid.
 """
 return 1 / (1 + np.exp(-x)), x
 @staticmethod
 def sigmoid_back(da, z):
 """
 Back propagate a single Sigmoid unit(neural network).
 Args:
 da: Post activation gradient.
 z: The output of the linear layer.
 Return:
 dz: gradient of the cost with respect to z.
 """
 sig = 1 / (1 + np.exp(-z))
 dz = da * sig * (1 - sig)
 return dz
 @staticmethod
 def relu(z):
 """
 Apply the RELU function(neural network).
 Args:
 z: The output of the linear layer.
 Return:
 a, z
 a: Output of RELU.
 """
 return np.maximum(0, z), z
 @staticmethod
 def relu_back(da, z):
 """
 Back propagate a single RELU unit(neural network).
 Args:
 da: Post activation gradient.
 z: The output of the linear layer.
 Return:
 dz: gradient of the cost with respect to z.
 """
 dz = np.array(da, copy=True)
 dz[z <= 0] = 0
 return dz
 def compute_cost(self, w, b, x, y):
 """
 Compute cost function using forward and back propagation(logistic regression).
 Args:
 w: numpy array of weights(also called Theta).
 b: Bias(int)
 x: Image data in the following shape(pixels * pixels * 3, number of images)
 y: Label numpy array of labels(0s and 1s)
 Return:
 Cost and gradient(dw and db).
 """
 total_images = x.shape[1]
 activation = self.sigmoid(np.dot(w.T, x) + b)
 cost = (-1 / total_images) * (
 np.sum(y * np.log(activation) + (1 - y) * np.log(1 - activation))
 )
 cost = np.squeeze(cost)
 dw = (1 / total_images) * np.dot(x, (activation - y).T)
 db = (1 / total_images) * np.sum(activation - y)
 return cost, dw, db
 @staticmethod
 def compute_cost_nn(last_activation, y):
 """
 Compute the cost function for the neural network using forward propagation.
 Args:
 last_activation: The last post-activation value.
 y: numpy array of labels.
 Return:
 Cross-entropy cost.
 """
 cost = -(
 y * np.log(last_activation) + (1 - y) * np.log(1 - last_activation)
 ).mean()
 return np.squeeze(cost)
 @staticmethod
 def linear_forward(a, w, b):
 """
 Apply the linear part of a single layer's forward propagation.
 Args:
 a: Activations from previous layer(or inputs X)
 w: numpy array of weights.
 b: numpy array of bias.
 Return:
 z, a, w, b
 z: The pre-activation parameter.
 """
 z = np.dot(w, a) + b
 return z, a, w, b
 def linear_activation_forward(self, a_prev, a_func, w, b):
 """
 Apply the forward propagation for the LINEAR->ACTIVATION layer.
 Args:
 a_prev: Activations from previous layer(or inputs X)
 a_func: Activation function 's' for sigmoid or 'r' for RELU.
 w: numpy array of weights.
 b: numpy array of bias.
 Return:
 post_a, linear_cache, activation_cache.
 """
 linear_cache, activation_cache, post_a = 0, 0, 0
 if a_func == 's':
 z, *linear_cache = self.linear_forward(a_prev, w, b)
 post_a, activation_cache = self.sigmoid_nn(z)
 if a_func == 'r':
 z, *linear_cache = self.linear_forward(a_prev, w, b)
 post_a, activation_cache = self.relu(z)
 return post_a, linear_cache, activation_cache
 def forward_prop(self, x, parameters):
 """
 Apply forward propagation for the [LINEAR- > RELU] * (L-1) -> LINEAR -> SIGMOID computation
 Args:
 x: Image data in the following shape(pixels * pixels * 3, number of images).
 parameters: A dictionary of initial parameters.
 Return: last_activation, caches.
 last_activation: The last post-activation value.
 caches: A list of caches containing every cache of linear_activation_forward()
 """
 caches = []
 activation = x
 layers = len(parameters) // 2
 for layer in range(1, layers):
 a_prev = activation
 current_weights = parameters['w' + str(layer)]
 current_bias = parameters['b' + str(layer)]
 activation, *cache = self.linear_activation_forward(
 a_prev, 'r', current_weights, current_bias
 )
 caches.append(cache)
 last_weight = parameters['w' + str(layers)]
 last_bias = parameters['b' + str(layers)]
 last_activation, *cache = self.linear_activation_forward(
 activation, 's', last_weight, last_bias
 )
 caches.append(cache)
 return last_activation, caches
 @staticmethod
 def linear_back(dz, cache):
 """
 Apply the linear part of a single neural network layer's back propagation.
 Args:
 dz: Gradient of the cost with respect to the linear output of current layer.
 cache: A tuple containing current layer's forward propagation cache(a_prev, w, b).
 Return:
 da_prev, dw, db
 da_prev: Gradient of the cost with respect to the activation(of previous layer)
 dw: Gradient of the cost with respect to w
 db: Gradient of the cost with respect to b
 """
 a_prev, w, b = cache
 m = a_prev.shape[1]
 dw = np.dot(dz, a_prev.T) / m
 db = np.sum(dz, axis=1, keepdims=True) / m
 da_prev = np.dot(w.T, dz)
 return da_prev, dw, db
 def linear_activation_back(self, a_func, da, cache):
 """
 Apply the back propagation for the LINEAR->ACTIVATION layer.
 Args:
 a_func: Activation function 's' for sigmoid or 'r' for RELU.
 da: Post activation gradient for current layer.
 cache: linear_cache, activation_cache from linear_activation_forward().
 Return:
 da_prev, dw, db
 da_prev: Gradient of the cost with respect to the activation(of previous layer)
 dw: Gradient of the cost with respect to w
 db: Gradient of the cost with respect to b
 """
 da_prev, dw, db = 0, 0, 0
 linear_cache, activation_cache = cache
 if a_func == 's':
 dz = self.sigmoid_back(da, activation_cache)
 da_prev, dw, db = self.linear_back(dz, linear_cache)
 if a_func == 'r':
 dz = self.relu_back(da, activation_cache)
 da_prev, dw, db = self.linear_back(dz, linear_cache)
 return da_prev, dw, db
 def back_prop(self, last_activation, y, caches):
 """
 Apply the backward propagation for the [LINEAR->RELU] * (L-1) -> LINEAR -> SIGMOID group.
 Args:
 last_activation: The last post-activation value.
 y: numpy array of labels.
 caches: A list of caches containing every cache of linear_activation_forward()
 Return:
 A dictionary of gradients.
 """
 grads = {}
 layers = len(caches)
 y = y.reshape(last_activation.shape)
 d_last_activation = -(
 np.divide(y, last_activation) - np.divide(1 - y, 1 - last_activation)
 )
 current_cache = caches[layers - 1]
 (
 grads['da' + str(layers)],
 grads['dw' + str(layers)],
 grads['db' + str(layers)],
 ) = self.linear_activation_back('s', d_last_activation, current_cache)
 for layer in reversed(range(layers - 1)):
 current_cache = caches[layer]
 da_prev_temp, dw_temp, db_temp = self.linear_activation_back(
 'r', grads['da' + str(layer + 2)], current_cache
 )
 grads['da' + str(layer + 1)] = da_prev_temp
 grads['dw' + str(layer + 1)] = dw_temp
 grads['db' + str(layer + 1)] = db_temp
 return grads
 @staticmethod
 def update_params(parameters, grads, learning_rate):
 """
 Update parameters for gradient descent(neural network).
 Args:
 parameters: A dictionary of initial parameters.
 grads: A dictionary of gradients.
 learning_rate: The learning rate of gradient descent.
 Return:
 Updated parameters.
 """
 layers = len(parameters) // 2
 for layer in range(layers):
 weight_decrease = learning_rate * grads['dw' + str(layer + 1)]
 parameters['w' + str(layer + 1)] -= weight_decrease
 bias_decrease = learning_rate * grads['db' + str(layer + 1)]
 parameters['b' + str(layer + 1)] -= bias_decrease
 return parameters
 def g_descent(self, w, b, x, y):
 """
 Optimize weights and bias using gradient descent algorithm(logistic regression).
 Args:
 w: numpy array of weights(also called Theta).
 b: Bias(int)
 x: Image data in the following shape(pixels * pixels * 3, number of images)
 y: Label numpy array of labels(0s and 1s)
 Return:
 w, b, dw, db, costs
 """
 dw, db, costs = 0, 0, []
 for iteration in range(self.max_iter):
 cost, dw, db = self.compute_cost(w, b, x, y)
 w -= dw * self.learning_rate
 b -= db * self.learning_rate
 costs.append(cost)
 if iteration % self.display_nth_iteration == 0:
 self.display_message(
 f'Iteration number: {iteration} out of {self.max_iter} iterations'
 )
 self.display_message(f'Current cost: {cost}\n')
 return w, b, dw, db, costs
 def g_descent_nn(self, x, y, parameters):
 """
 Optimize weights and bias using gradient descent algorithm(neural network).
 Args:
 x: Image data in the following shape(pixels * pixels * 3, number of images)
 y: Label numpy array of labels(0s and 1s)
 parameters: A dictionary of initial parameters(weights/bias).
 Return:
 parameters, costs
 """
 costs = []
 for iteration in range(self.max_iter):
 last_activation, caches = self.forward_prop(x, parameters)
 current_cost = self.compute_cost_nn(last_activation, y)
 costs.append(current_cost)
 grads = self.back_prop(last_activation, y, caches)
 parameters = self.update_params(parameters, grads, self.learning_rate)
 if iteration % self.display_nth_iteration == 0:
 self.display_message(
 f'Iteration number: {iteration} out of {self.max_iter} iterations'
 )
 self.display_message(f'Current cost: {current_cost}\n')
 return parameters, costs
 def predict(self, w, b, x):
 """
 Predict labels of x (logistic regression).
 Args:
 w: numpy array of weights(also called Theta).
 b: Bias(int)
 x: Image data in the following shape(pixels * pixels * 3, number of images)
 Return:
 Y^ numpy array of predictions.
 """
 w = w.reshape(x.shape[0], 1)
 activation = self.sigmoid(np.dot(w.T, x) + b)
 activation[activation > 0.5] = 1
 activation[activation <= 0.5] = 0
 return activation
 def predict_nn(self, x, parameters):
 """
 Predict labels using a neural network.
 Args:
 x: Image data in the following shape(pixels * pixels * 3, number of images)
 parameters: A dictionary of parameters of the trained model.
 Return:
 Y^ numpy array of predictions.
 """
 last_activation, caches = self.forward_prop(x, parameters)
 last_activation[last_activation > 0.5] = 1
 last_activation[last_activation <= 0.5] = 0
 return last_activation
 def plot_figure(self, figure_type, frames=None, costs=None):
 """
 Plot samples or learning curve.
 Args:
 figure_type: String indication of the figure to plot.
 frames: pandas DataFrame with the image data.
 costs: A list of costs for plotting the learning curve.
 Return:
 None
 """
 rows, columns = self.sample_dimensions[0], self.sample_dimensions[1]
 sample_size = rows * columns
 if figure_type == 'lc':
 plt.figure(figsize=self.fig_size)
 plt.title('Learning curve')
 plt.plot(range(self.max_iter), costs)
 plt.xlabel('Iterations')
 plt.ylabel('Cost')
 if self.save_figs:
 plt.savefig('Learning curve' + '.png')
 if figure_type == 'r':
 to_display = frames[frames['Accuracy'] == 1][
 ['Images', 'Predictions']
 ].head(sample_size)
 images = np.array(list(to_display['Images']))
 predictions = np.array(list(to_display['Predictions']))
 title = f'Classification of {self.target_label} results sample'
 self.display_sample_images(title, images, predictions)
 if figure_type == 'e':
 to_display = frames[frames['Accuracy'] == 0][
 ['Images', 'Predictions']
 ].head(sample_size)
 images = np.array(list(to_display['Images']))
 predictions = np.array(list(to_display['Predictions']))
 title = f'Sample of the misclassified images'
 self.display_sample_images(title, images, predictions)
 def save_run_details(
 self, path, alg, accuracy, total_time, image_size, x_train, x_test, frames
 ):
 """
 Save run details to a .txt file.
 Args:
 path: Path to save the .txt file.
 alg: Classification algorithm.
 accuracy: Train and test accuracy.
 total_time: Total time taken in seconds.
 image_size: Image dimensions.
 x_train: Training set.
 x_test: Test set.
 frames: pandas DataFrame with
 Return:
 None
 """
 with open(path + '/' + 'run_details.txt', 'w') as details:
 details.write(f'Algorithm: {alg}\n')
 details.write(f'Iterations: {self.max_iter} iterations.\n')
 details.write(f'Training accuracy: {accuracy[0]}%\n')
 details.write(f'Test accuracy: {accuracy[1]}%\n')
 details.write(f'Image size: {image_size} x {image_size}\n')
 details.write(f'Learning rate: {self.learning_rate}\n')
 details.write(f'Test sample: {self.test_size}\n')
 details.write(f'Layer dimensions: {self.flat_dimensions}\n')
 details.write(f'Data set size: {len(x_train) + len(x_test)}\n')
 details.write(f'Training set size: {len(x_train)}\n')
 details.write(f'Test set size: {len(x_test)}\n')
 details.write(
 f'Number of {self.target_label} examples: {len(frames[frames["Classification"] == 1])}\n'
 )
 details.write(
 f'Number of non-{self.target_label} examples: {len(frames[frames["Classification"] == 0])}\n'
 )
 details.write(f'Total time: {total_time} seconds.')
 def predict_folder(self, alg, sub_folders=None):
 """
 Classify target label among specified folders.
 Args:
 alg: Algorithm to use for classification:
 'lr': Logistic regression.
 'nn': Neural network.
 sub_folders: Sub_folders to load and process.
 Return:
 pandas DataFrame with the results.
 """
 start_time = perf_counter()
 if self.new_hdf:
 self.clear_hdf(sub_folders)
 self.folders_to_hdf(sub_folders)
 x_train, y_train, x_test, y_test, frames = self.pre_process(sub_folders)
 train_predictions, test_predictions, all_predictions, costs = (
 None,
 None,
 None,
 None,
 )
 if alg == 'lr':
 w, b = np.zeros((len(x_train), 1)), 0
 w, b, dw, db, costs = self.g_descent(w, b, x_train, y_train)
 train_predictions = self.predict(w, b, x_train)
 test_predictions = self.predict(w, b, x_test)
 all_predictions = np.append(train_predictions, test_predictions)
 if alg == 'nn':
 initial_parameters = self.initialize_parameters()
 parameters, costs = self.g_descent_nn(x_train, y_train, initial_parameters)
 train_predictions = self.predict_nn(x_train, parameters)
 test_predictions = self.predict_nn(x_test, parameters)
 all_predictions = np.append(train_predictions, test_predictions)
 training_accuracy = 100 - np.mean(np.abs(train_predictions - y_train)) * 100
 test_accuracy = 100 - np.mean(np.abs(test_predictions - y_test)) * 100
 frames['Predictions'] = all_predictions
 frames['Accuracy'] = 0
 frames.loc[frames['Predictions'] == frames['Classification'], 'Accuracy'] = 1
 self.display_message(f'Training accuracy: {training_accuracy}%')
 self.display_message(f'Test accuracy: {test_accuracy}%')
 self.display_message(f'Train predictions: \n{train_predictions}')
 self.display_message(f'Train actual: \n{y_train}')
 self.display_message(f'Test predictions: \n{test_predictions}')
 self.display_message(f'Test actual: \n{y_test}')
 if self.display_figs and 'lc' in self.display_figs:
 self.plot_figure('lc', costs=costs)
 if self.display_figs and 'r' in self.display_figs:
 self.plot_figure('r', frames)
 if self.display_figs and 'e' in self.display_figs:
 self.plot_figure('e', frames)
 if self.show_figs:
 plt.show()
 end_time = perf_counter()
 total_time = end_time - start_time
 self.display_message(f'Time: {total_time} seconds.')
 if self.log_file or self.save_figs:
 new_folder_name = f'{alg}-{self.max_iter}-{self.learning_rate}-{random.randint(10 ** 6, 10 ** 7)}'
 os.mkdir(new_folder_name)
 img_size = np.sqrt(self.flat_dimensions[0] / 3)
 self.save_run_details(
 './' + new_folder_name,
 alg,
 (training_accuracy, test_accuracy),
 total_time,
 img_size,
 x_train,
 x_test,
 frames
 )
 for file_name in os.listdir('.'):
 if file_name.endswith('.png') or file_name.endswith('.txt'):
 shutil.move(file_name, new_folder_name)
 return frames
if __name__ == '__main__':
 clf = ImageClassifier(
 'test_photos/',
 'Dog',
 display_figs=['i', 'lc', 'r', 'e'],
 max_iter=7000,
 learning_rate=0.0015,
 show_figs=True,
 save_figs=True,
 log_file='log.txt',
 random_seed=151
 )
 clf.predict_folder('nn')
asked Jan 24, 2020 at 5:22
\$\endgroup\$

0

Know someone who can answer? Share a link to this question via email, Twitter, or Facebook.

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.