|  | 
|  | 1 | +import numpy as np | 
|  | 2 | + | 
|  | 3 | +import matplotlib.pyplot as plt | 
|  | 4 | + | 
|  | 5 | +from sklearn.datasets import make_blobs, make_circles, make_moons | 
|  | 6 | +from sklearn.preprocessing import StandardScaler | 
|  | 7 | + | 
|  | 8 | +from scipy.misc import imresize | 
|  | 9 | + | 
|  | 10 | +# Set tolerances | 
|  | 11 | +tol = 0.01 # error tolerance | 
|  | 12 | +eps = 0.01 # alpha tolerance | 
|  | 13 | + | 
|  | 14 | +class SMOModel: | 
|  | 15 | + """Container object for the model used for sequential minimal optimization.""" | 
|  | 16 | + | 
|  | 17 | + def __init__(self, X, y, C, kernel, alphas, b, errors): | 
|  | 18 | + self.X = X # training data vector | 
|  | 19 | + self.y = y # class label vector | 
|  | 20 | + self.C = C # regularization parameter | 
|  | 21 | + self.kernel = kernel # kernel function | 
|  | 22 | + self.alphas = alphas # lagrange multiplier vector | 
|  | 23 | + self.b = b # scalar bias term | 
|  | 24 | + self.errors = errors # error cache | 
|  | 25 | + self._obj = [] # record of objective function value | 
|  | 26 | + self.m = len(self.X) # store size of training set | 
|  | 27 | + | 
|  | 28 | + | 
|  | 29 | +def linear(x, y, b=1): | 
|  | 30 | + """ | 
|  | 31 | + Computes the linear kernel between x and y | 
|  | 32 | + | 
|  | 33 | + Args: | 
|  | 34 | + b: Bias (a scalar) | 
|  | 35 | + x: array | 
|  | 36 | + y: array | 
|  | 37 | + | 
|  | 38 | + Returns: | 
|  | 39 | + Linear kernel between x and y | 
|  | 40 | + """ | 
|  | 41 | + | 
|  | 42 | + result = None | 
|  | 43 | + | 
|  | 44 | + ####################################################################### | 
|  | 45 | + # TODO: # | 
|  | 46 | + # Compute the linear kernel between x and y # | 
|  | 47 | + #######################################################################  | 
|  | 48 | + result = np.dot(x, y.T)+b | 
|  | 49 | + pass | 
|  | 50 | + | 
|  | 51 | + ####################################################################### | 
|  | 52 | + # END OF YOUR CODE # | 
|  | 53 | + ####################################################################### | 
|  | 54 | + | 
|  | 55 | + return result | 
|  | 56 | + | 
|  | 57 | + | 
|  | 58 | +def gaussian(x, y, sigma=1):  | 
|  | 59 | + """ | 
|  | 60 | + Computes the gaussian kernel between x and y | 
|  | 61 | + | 
|  | 62 | + Args: | 
|  | 63 | + x: array | 
|  | 64 | + y: array | 
|  | 65 | + sigma: scalar | 
|  | 66 | + | 
|  | 67 | + Returns: | 
|  | 68 | + Gaussian similarity | 
|  | 69 | + """ | 
|  | 70 | + | 
|  | 71 | + result = None | 
|  | 72 | + | 
|  | 73 | + ####################################################################### | 
|  | 74 | + # TODO: # | 
|  | 75 | + # Compute the Gaussian kernel between x and y # | 
|  | 76 | + ####################################################################### | 
|  | 77 | + #result = np.exp(-(np.absolute(x-y)**2)/2*(sigma**2)) | 
|  | 78 | + if np.ndim(x) == 1 and np.ndim(y) == 1: | 
|  | 79 | + result = np.exp(- np.linalg.norm(x - y) / (2 * sigma ** 2)) | 
|  | 80 | + elif (np.ndim(x) > 1 and np.ndim(y) == 1) or (np.ndim(x) == 1 and np.ndim(y) > 1): | 
|  | 81 | + result = np.exp(- np.linalg.norm(x - y, axis=1) / (2 * sigma ** 2)) | 
|  | 82 | + elif np.ndim(x) > 1 and np.ndim(y) > 1: | 
|  | 83 | + result = np.exp(- np.linalg.norm(x[:, np.newaxis] - y[np.newaxis, :], axis=2) / (2 * sigma ** 2)) | 
|  | 84 | + pass | 
|  | 85 | + | 
|  | 86 | + ####################################################################### | 
|  | 87 | + # END OF YOUR CODE # | 
|  | 88 | + ####################################################################### | 
|  | 89 | + | 
|  | 90 | + return result | 
|  | 91 | + | 
|  | 92 | + | 
|  | 93 | +def objective_function(alphas, y,kernel, X): | 
|  | 94 | + """ | 
|  | 95 | + Computes the objective function | 
|  | 96 | + | 
|  | 97 | + Args: | 
|  | 98 | + alphas: Lagrangian multipliers | 
|  | 99 | + y: class labels -1 or 1 | 
|  | 100 | + X: training data | 
|  | 101 | + | 
|  | 102 | + Returns: | 
|  | 103 | + Value of the objective function | 
|  | 104 | + """ | 
|  | 105 | + | 
|  | 106 | + result = None | 
|  | 107 | + | 
|  | 108 | + ####################################################################### | 
|  | 109 | + # TODO: # | 
|  | 110 | + # Compute the objective function # | 
|  | 111 | + ####################################################################### | 
|  | 112 | + result = np.sum(alphas) - 0.5 * np.sum(y * y * kernel(X, X) * alphas * alphas) #correct | 
|  | 113 | + pass | 
|  | 114 | + | 
|  | 115 | + ####################################################################### | 
|  | 116 | + # END OF YOUR CODE # | 
|  | 117 | + ####################################################################### | 
|  | 118 | + | 
|  | 119 | + return result | 
|  | 120 | + | 
|  | 121 | + | 
|  | 122 | +# Decision function | 
|  | 123 | + | 
|  | 124 | +def decision_function(alphas, target, kernel, X_train, x_test, b): | 
|  | 125 | + """ | 
|  | 126 | + Compute the decision function | 
|  | 127 | + | 
|  | 128 | + Args: | 
|  | 129 | + alphas: Lagrangian multipliers | 
|  | 130 | + y: class labels -1 or 1 | 
|  | 131 | + X: training/test data | 
|  | 132 | + | 
|  | 133 | + Returns: | 
|  | 134 | + Output of decision function | 
|  | 135 | + """ | 
|  | 136 | + | 
|  | 137 | + result = None | 
|  | 138 | + | 
|  | 139 | + ####################################################################### | 
|  | 140 | + # TODO: # | 
|  | 141 | + # Compute the decision function # | 
|  | 142 | + ####################################################################### | 
|  | 143 | + #result = (alphas * target) @ kernel(X_train, x_test) - b #correct | 
|  | 144 | + result = np.dot((alphas * target), kernel(X_train, x_test)) - b | 
|  | 145 | + pass | 
|  | 146 | + | 
|  | 147 | + ####################################################################### | 
|  | 148 | + # END OF YOUR CODE # | 
|  | 149 | + ####################################################################### | 
|  | 150 | + | 
|  | 151 | + return result | 
|  | 152 | + | 
|  | 153 | + | 
|  | 154 | + | 
|  | 155 | +def plot_decision_boundary(model, ax, resolution=100, colors=('b', 'k', 'r')): | 
|  | 156 | + """Plots the model's decision boundary on the input axes object. | 
|  | 157 | + Range of decision boundary grid is determined by the training data. | 
|  | 158 | + Returns decision boundary grid and axes object (`grid`, `ax`).""" | 
|  | 159 | + | 
|  | 160 | + # Generate coordinate grid of shape [resolution x resolution] | 
|  | 161 | + # and evaluate the model over the entire space | 
|  | 162 | + xrange = np.linspace(model.X[:,0].min(), model.X[:,0].max(), resolution) | 
|  | 163 | + yrange = np.linspace(model.X[:,1].min(), model.X[:,1].max(), resolution) | 
|  | 164 | + grid = [[decision_function(model.alphas, model.y, | 
|  | 165 | + model.kernel, model.X, | 
|  | 166 | + np.array([xr, yr]), model.b) for yr in yrange] for xr in xrange] | 
|  | 167 | + grid = np.array(grid).reshape(len(xrange), len(yrange)) | 
|  | 168 | + | 
|  | 169 | + # Plot decision contours using grid and | 
|  | 170 | + # make a scatter plot of training data | 
|  | 171 | + ax.contour(xrange, yrange, grid, (-1, 0, 1), linewidths=(1, 1, 1), | 
|  | 172 | + linestyles=('--', '-', '--'), colors=colors) | 
|  | 173 | + ax.scatter(model.X[:,0], model.X[:,1], | 
|  | 174 | + c=model.y, cmap=plt.cm.viridis, lw=0, alpha=0.5) | 
|  | 175 | + | 
|  | 176 | + # Plot support vectors (non-zero alphas) | 
|  | 177 | + # as circled points (linewidth > 0) | 
|  | 178 | + mask = model.alphas != 0.0 | 
|  | 179 | + ax.scatter(model.X[:,0][mask], model.X[:,1][mask], | 
|  | 180 | + c=model.y[mask], cmap=plt.cm.viridis) | 
|  | 181 | + | 
|  | 182 | + return grid, ax | 
|  | 183 | + | 
|  | 184 | +def take_step(i1, i2, model): | 
|  | 185 | + | 
|  | 186 | + # Skip if chosen alphas are the same | 
|  | 187 | + if i1 == i2: | 
|  | 188 | + return 0, model | 
|  | 189 | + | 
|  | 190 | + alph1 = model.alphas[i1] | 
|  | 191 | + alph2 = model.alphas[i2] | 
|  | 192 | + y1 = model.y[i1] | 
|  | 193 | + y2 = model.y[i2] | 
|  | 194 | + E1 = model.errors[i1] | 
|  | 195 | + E2 = model.errors[i2] | 
|  | 196 | + s = y1 * y2 | 
|  | 197 | + | 
|  | 198 | + # Compute L & H, the bounds on new possible alpha values | 
|  | 199 | + if (y1 != y2): | 
|  | 200 | + L = max(0, alph2 - alph1) | 
|  | 201 | + H = min(model.C, model.C + alph2 - alph1) | 
|  | 202 | + elif (y1 == y2): | 
|  | 203 | + L = max(0, alph1 + alph2 - model.C) | 
|  | 204 | + H = min(model.C, alph1 + alph2) | 
|  | 205 | + if (L == H): | 
|  | 206 | + return 0, model | 
|  | 207 | + | 
|  | 208 | + # Compute kernel & 2nd derivative eta | 
|  | 209 | + k11 = model.kernel(model.X[i1], model.X[i1]) | 
|  | 210 | + k12 = model.kernel(model.X[i1], model.X[i2]) | 
|  | 211 | + k22 = model.kernel(model.X[i2], model.X[i2]) | 
|  | 212 | + eta = 2 * k12 - k11 - k22 | 
|  | 213 | + | 
|  | 214 | + # Compute new alpha 2 (a2) if eta is negative | 
|  | 215 | + if (eta < 0): | 
|  | 216 | + a2 = alph2 - y2 * (E1 - E2) / eta | 
|  | 217 | + # Clip a2 based on bounds L & H | 
|  | 218 | + | 
|  | 219 | + ####################################################################### | 
|  | 220 | + # TODO: # | 
|  | 221 | + # Clip a2 based on the last equation in the notes # | 
|  | 222 | + ####################################################################### | 
|  | 223 | + if L < a2 < H: | 
|  | 224 | + a2 = a2 | 
|  | 225 | + elif (a2 <= L): | 
|  | 226 | + a2 = L | 
|  | 227 | + elif (a2 >= H): | 
|  | 228 | + a2 = H | 
|  | 229 | + pass | 
|  | 230 | + | 
|  | 231 | + ####################################################################### | 
|  | 232 | + # END OF YOUR CODE # | 
|  | 233 | + ####################################################################### | 
|  | 234 | + | 
|  | 235 | + # If eta is non-negative, move new a2 to bound with greater objective function value | 
|  | 236 | + else: | 
|  | 237 | + alphas_adj = model.alphas.copy() | 
|  | 238 | + alphas_adj[i2] = L | 
|  | 239 | + # objective function output with a2 = L | 
|  | 240 | + Lobj = objective_function(alphas_adj, model.y, model.kernel, model.X)  | 
|  | 241 | + alphas_adj[i2] = H | 
|  | 242 | + # objective function output with a2 = H | 
|  | 243 | + Hobj = objective_function(alphas_adj, model.y, model.kernel, model.X) | 
|  | 244 | + if Lobj > (Hobj + eps): | 
|  | 245 | + a2 = L | 
|  | 246 | + elif Lobj < (Hobj - eps): | 
|  | 247 | + a2 = H | 
|  | 248 | + else: | 
|  | 249 | + a2 = alph2 | 
|  | 250 | + | 
|  | 251 | + # Push a2 to 0 or C if very close | 
|  | 252 | + if a2 < 1e-8: | 
|  | 253 | + a2 = 0.0 | 
|  | 254 | + elif a2 > (model.C - 1e-8): | 
|  | 255 | + a2 = model.C | 
|  | 256 | + | 
|  | 257 | + # If examples can't be optimized within epsilon (eps), skip this pair | 
|  | 258 | + if (np.abs(a2 - alph2) < eps * (a2 + alph2 + eps)): | 
|  | 259 | + return 0, model | 
|  | 260 | + | 
|  | 261 | + # Calculate new alpha 1 (a1) | 
|  | 262 | + a1 = alph1 + s * (alph2 - a2) | 
|  | 263 | + | 
|  | 264 | + # Update threshold b to reflect newly calculated alphas | 
|  | 265 | + # Calculate both possible thresholds | 
|  | 266 | + b1 = E1 + y1 * (a1 - alph1) * k11 + y2 * (a2 - alph2) * k12 + model.b | 
|  | 267 | + b2 = E2 + y1 * (a1 - alph1) * k12 + y2 * (a2 - alph2) * k22 + model.b | 
|  | 268 | + | 
|  | 269 | + # Set new threshold based on if a1 or a2 is bound by L and/or H | 
|  | 270 | + if 0 < a1 and a1 < model.C: | 
|  | 271 | + b_new = b1 | 
|  | 272 | + elif 0 < a2 and a2 < model.C: | 
|  | 273 | + b_new = b2 | 
|  | 274 | + # Average thresholds if both are bound | 
|  | 275 | + else: | 
|  | 276 | + b_new = (b1 + b2) * 0.5 | 
|  | 277 | + | 
|  | 278 | + # Update model object with new alphas & threshold | 
|  | 279 | + model.alphas[i1] = a1 | 
|  | 280 | + model.alphas[i2] = a2 | 
|  | 281 | + | 
|  | 282 | + # Update error cache | 
|  | 283 | + # Error cache for optimized alphas is set to 0 if they're unbound | 
|  | 284 | + for index, alph in zip([i1, i2], [a1, a2]): | 
|  | 285 | + if 0.0 < alph < model.C: | 
|  | 286 | + model.errors[index] = 0.0 | 
|  | 287 | + | 
|  | 288 | + # Set non-optimized errors based on equation 12.11 in Platt's book | 
|  | 289 | + non_opt = [n for n in range(model.m) if (n != i1 and n != i2)] | 
|  | 290 | + model.errors[non_opt] = model.errors[non_opt] + \ | 
|  | 291 | + y1*(a1 - alph1)*model.kernel(model.X[i1], model.X[non_opt]) + \ | 
|  | 292 | + y2*(a2 - alph2)*model.kernel(model.X[i2], model.X[non_opt]) + model.b - b_new | 
|  | 293 | + | 
|  | 294 | + # Update model threshold | 
|  | 295 | + model.b = b_new | 
|  | 296 | + | 
|  | 297 | + return 1, model | 
|  | 298 | + | 
|  | 299 | +def examine_example(i2, model): | 
|  | 300 | + | 
|  | 301 | + y2 = model.y[i2] | 
|  | 302 | + alph2 = model.alphas[i2] | 
|  | 303 | + E2 = model.errors[i2] | 
|  | 304 | + r2 = E2 * y2 | 
|  | 305 | + | 
|  | 306 | + # Proceed if error is within specified tolerance (tol) | 
|  | 307 | + if ((r2 < -tol and alph2 < model.C) or (r2 > tol and alph2 > 0)): | 
|  | 308 | + | 
|  | 309 | + if len(model.alphas[(model.alphas != 0) & (model.alphas != model.C)]) > 1: | 
|  | 310 | + # Use 2nd choice heuristic is choose max difference in error | 
|  | 311 | + if model.errors[i2] > 0: | 
|  | 312 | + i1 = np.argmin(model.errors) | 
|  | 313 | + elif model.errors[i2] <= 0: | 
|  | 314 | + i1 = np.argmax(model.errors) | 
|  | 315 | + step_result, model = take_step(i1, i2, model) | 
|  | 316 | + if step_result: | 
|  | 317 | + return 1, model | 
|  | 318 | + | 
|  | 319 | + # Loop through non-zero and non-C alphas, starting at a random point | 
|  | 320 | + for i1 in np.roll(np.where((model.alphas != 0) & (model.alphas != model.C))[0], | 
|  | 321 | + np.random.choice(np.arange(model.m))): | 
|  | 322 | + step_result, model = take_step(i1, i2, model) | 
|  | 323 | + if step_result: | 
|  | 324 | + return 1, model | 
|  | 325 | + | 
|  | 326 | + # loop through all alphas, starting at a random point | 
|  | 327 | + for i1 in np.roll(np.arange(model.m), np.random.choice(np.arange(model.m))): | 
|  | 328 | + step_result, model = take_step(i1, i2, model) | 
|  | 329 | + if step_result: | 
|  | 330 | + return 1, model | 
|  | 331 | + | 
|  | 332 | + return 0, model | 
|  | 333 | + | 
|  | 334 | +def train(model): | 
|  | 335 | + | 
|  | 336 | + numChanged = 0 | 
|  | 337 | + examineAll = 1 | 
|  | 338 | + | 
|  | 339 | + while(numChanged > 0) or (examineAll): | 
|  | 340 | + numChanged = 0 | 
|  | 341 | + if examineAll: | 
|  | 342 | + # loop over all training examples | 
|  | 343 | + for i in range(model.alphas.shape[0]): | 
|  | 344 | + examine_result, model = examine_example(i, model) | 
|  | 345 | + numChanged += examine_result | 
|  | 346 | + if examine_result: | 
|  | 347 | + obj_result = objective_function(model.alphas, model.y, model.kernel, model.X) | 
|  | 348 | + model._obj.append(obj_result) | 
|  | 349 | + else: | 
|  | 350 | + # loop over examples where alphas are not already at their limits | 
|  | 351 | + for i in np.where((model.alphas != 0) & (model.alphas != model.C))[0]: | 
|  | 352 | + examine_result, model = examine_example(i, model) | 
|  | 353 | + numChanged += examine_result | 
|  | 354 | + if examine_result: | 
|  | 355 | + obj_result = objective_function(model.alphas, model.y, model.kernel, model.X) | 
|  | 356 | + model._obj.append(obj_result) | 
|  | 357 | + if examineAll == 1: | 
|  | 358 | + examineAll = 0 | 
|  | 359 | + elif numChanged == 0: | 
|  | 360 | + examineAll = 1 | 
|  | 361 | + | 
|  | 362 | + return model | 
|  | 363 | + | 
|  | 364 | + | 
|  | 365 | + | 
0 commit comments