Commit 4467423

authored

Fix grammar and spelling mistakes in sequential_minimum_optimization.py (TheAlgorithms#11427)

1 parent 41a1cdf commit 4467423Copy full SHA for 4467423

File tree

1 file changed

+66

-69

lines changed

machine_learning
- sequential_minimum_optimization.py

1 file changed

+66

-69

lines changed

`‎machine_learning/sequential_minimum_optimization.py‎`

Lines changed: 66 additions & 69 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,11 +1,9 @@`
`1`	`1`	`"""`
`2`		`- Implementation of sequential minimal optimization (SMO) for support vector machines`
`3`		`- (SVM).`
	`2`	`+Sequential minimal optimization (SMO) for support vector machines (SVM)`
`4`	`3`
`5`		`- Sequential minimal optimization (SMO) is an algorithm for solving the quadratic`
`6`		`- programming (QP) problem that arises during the training of support vector`
`7`		`- machines.`
`8`		`- It was invented by John Platt in 1998.`
	`4`	`+Sequential minimal optimization (SMO) is an algorithm for solving the quadratic`
	`5`	`+programming (QP) problem that arises during the training of SVMs. It was invented by`
	`6`	`+John Platt in 1998.`
`9`	`7`
`10`	`8`	`Input:`
`11`	`9`	`0: type: numpy.ndarray.`
`@@ -124,8 +122,7 @@ def fit(self):`
`124`	`122`	`b_old = self._b`
`125`	`123`	`self._b = b`
`126`	`124`
`127`		`- # 4: update error value,here we only calculate those non-bound samples'`
`128`		`- # error`
	`125`	`+ # 4: update error, here we only calculate the error for non-bound samples`
`129`	`126`	`self._unbound = [i for i in self._all_samples if self._is_unbound(i)]`
`130`	`127`	`for s in self.unbound:`
`131`	`128`	`if s in (i1, i2):`
`@@ -136,7 +133,7 @@ def fit(self):`
`136`	`133`	`+ (self._b - b_old)`
`137`	`134`	`)`
`138`	`135`
`139`		`- # if i1 or i2 is non-bound,update there error value to zero`
	`136`	`+ # if i1 or i2 is non-bound,update their error value to zero`
`140`	`137`	`if self._is_unbound(i1):`
`141`	`138`	`self._error[i1] = 0`
`142`	`139`	`if self._is_unbound(i2):`
`@@ -161,7 +158,7 @@ def predict(self, test_samples, classify=True):`
`161`	`158`	`results.append(result)`
`162`	`159`	`return np.array(results)`
`163`	`160`
`164`		`- # Check if alpha violate KKT condition`
	`161`	`+ # Check if alpha violates the KKT condition`
`165`	`162`	`def _check_obey_kkt(self, index):`
`166`	`163`	`alphas = self.alphas`
`167`	`164`	`tol = self._tol`
`@@ -172,20 +169,19 @@ def _check_obey_kkt(self, index):`
`172`	`169`
`173`	`170`	`# Get value calculated from kernel function`
`174`	`171`	`def _k(self, i1, i2):`
`175`		`- # for test samples,use Kernel function`
	`172`	`+ # for test samples,use kernel function`
`176`	`173`	`if isinstance(i2, np.ndarray):`
`177`	`174`	`return self.Kernel(self.samples[i1], i2)`
`178`		`- # for train samples,Kernel values have been saved in matrix`
	`175`	`+ # for training samples, kernel values have been saved in matrix`
`179`	`176`	`else:`
`180`	`177`	`return self._K_matrix[i1, i2]`
`181`	`178`
`182`		`- # Get sample's error`
	`179`	`+ # Get error for sample`
`183`	`180`	`def _e(self, index):`
`184`	`181`	`"""`
`185`	`182`	`Two cases:`
`186`		`- 1:Sample[index] is non-bound,Fetch error from list: _error`
`187`		`- 2:sample[index] is bound,Use predicted value deduct true value: g(xi) - yi`
`188`		`-`
	`183`	`+ 1: Sample[index] is non-bound, fetch error from list: _error`
	`184`	`+ 2: sample[index] is bound, use predicted value minus true value: g(xi) - yi`
`189`	`185`	`"""`
`190`	`186`	`# get from error data`
`191`	`187`	`if self._is_unbound(index):`
`@@ -196,7 +192,7 @@ def _e(self, index):`
`196`	`192`	`yi = self.tags[index]`
`197`	`193`	`return gx - yi`
`198`	`194`
`199`		`- # Calculate Kernel matrix of all possible i1,i2 ,saving time`
	`195`	`+ # Calculate kernel matrix of all possible i1, i2, saving time`
`200`	`196`	`def _calculate_k_matrix(self):`
`201`	`197`	`k_matrix = np.zeros([self.length, self.length])`
`202`	`198`	`for i in self._all_samples:`
`@@ -206,7 +202,7 @@ def _calculate_k_matrix(self):`
`206`	`202`	`)`
`207`	`203`	`return k_matrix`
`208`	`204`
`209`		`- # Predict test sample's tag`
	`205`	`+ # Predict tag for test sample`
`210`	`206`	`def _predict(self, sample):`
`211`	`207`	`k = self._k`
`212`	`208`	`predicted_value = (`
`@@ -222,30 +218,31 @@ def _predict(self, sample):`
`222`	`218`
`223`	`219`	`# Choose alpha1 and alpha2`
`224`	`220`	`def _choose_alphas(self):`
`225`		`- locis = yield from self._choose_a1()`
`226`		`- if not locis:`
	`221`	`+ loci = yield from self._choose_a1()`
	`222`	`+ if not loci:`
`227`	`223`	`return None`
`228`		`- return locis`
	`224`	`+ return loci`
`229`	`225`
`230`	`226`	`def _choose_a1(self):`
`231`	`227`	`"""`
`232`		`- Choose first alpha ;steps:`
`233`		`- 1:First loop over all sample`
`234`		`- 2:Second loop over all non-bound samples till all non-bound samples does not`
`235`		`- voilate kkt condition.`
`236`		`- 3:Repeat this two process endlessly,till all samples does not voilate kkt`
`237`		`- condition samples after first loop.`
	`228`	`+ Choose first alpha`
	`229`	`+ Steps:`
	`230`	`+ 1: First loop over all samples`
	`231`	`+ 2: Second loop over all non-bound samples until no non-bound samples violate`
	`232`	`+ the KKT condition.`
	`233`	`+ 3: Repeat these two processes until no samples violate the KKT condition`
	`234`	`+ after the first loop.`
`238`	`235`	`"""`
`239`	`236`	`while True:`
`240`	`237`	`all_not_obey = True`
`241`	`238`	`# all sample`
`242`		`- print("scanning all sample!")`
	`239`	`+ print("Scanning all samples!")`
`243`	`240`	`for i1 in [i for i in self._all_samples if self._check_obey_kkt(i)]:`
`244`	`241`	`all_not_obey = False`
`245`	`242`	`yield from self._choose_a2(i1)`
`246`	`243`
`247`	`244`	`# non-bound sample`
`248`		`- print("scanning non-bound sample!")`
	`245`	`+ print("Scanning non-bound samples!")`
`249`	`246`	`while True:`
`250`	`247`	`not_obey = True`
`251`	`248`	`for i1 in [`
`@@ -256,20 +253,21 @@ def _choose_a1(self):`
`256`	`253`	`not_obey = False`
`257`	`254`	`yield from self._choose_a2(i1)`
`258`	`255`	`if not_obey:`
`259`		`- print("all non-bound samples fit the KKT condition!")`
	`256`	`+ print("All non-bound samples satisfy the KKT condition!")`
`260`	`257`	`break`
`261`	`258`	`if all_not_obey:`
`262`		`- print("all samples fit the KKT condition! Optimization done!")`
	`259`	`+ print("All samples satisfy the KKT condition!")`
`263`	`260`	`break`
`264`	`261`	`return False`
`265`	`262`
`266`	`263`	`def _choose_a2(self, i1):`
`267`	`264`	`"""`
`268`		`- Choose the second alpha by using heuristic algorithm ;steps:`
`269`		`- 1: Choose alpha2 which gets the maximum step size (\|E1 - E2\|).`
`270`		`- 2: Start in a random point,loop over all non-bound samples till alpha1 and`
	`265`	`+ Choose the second alpha using a heuristic algorithm`
	`266`	`+ Steps:`
	`267`	`+ 1: Choose alpha2 that maximizes the step size (\|E1 - E2\|).`
	`268`	`+ 2: Start in a random point, loop over all non-bound samples till alpha1 and`
`271`	`269`	`alpha2 are optimized.`
`272`		`- 3: Start in a random point,loop over all samples till alpha1 and alpha2 are`
	`270`	`+ 3: Start in a random point,loop over all samples till alpha1 and alpha2 are`
`273`	`271`	`optimized.`
`274`	`272`	`"""`
`275`	`273`	`self._unbound = [i for i in self._all_samples if self._is_unbound(i)]`
`@@ -306,7 +304,7 @@ def _get_new_alpha(self, i1, i2, a1, a2, e1, e2, y1, y2):`
`306`	`304`	`if i1 == i2:`
`307`	`305`	`return None, None`
`308`	`306`
`309`		`- # calculate L and H which bound the new alpha2`
	`307`	`+ # calculate L and H which bound the new alpha2`
`310`	`308`	`s = y1 * y2`
`311`	`309`	`if s == -1:`
`312`	`310`	`l, h = max(0.0, a2 - a1), min(self._c, self._c + a2 - a1) # noqa: E741`
`@@ -320,7 +318,7 @@ def _get_new_alpha(self, i1, i2, a1, a2, e1, e2, y1, y2):`
`320`	`318`	`k22 = k(i2, i2)`
`321`	`319`	`k12 = k(i1, i2)`
`322`	`320`
`323`		`- # select the new alpha2 which could get the minimal objectives`
	`321`	`+ # select the new alpha2 which could achieve the minimal objectives`
`324`	`322`	`if (eta := k11 + k22 - 2.0 * k12) > 0.0:`
`325`	`323`	`a2_new_unc = a2 + (y2 * (e1 - e2)) / eta`
`326`	`324`	`# a2_new has a boundary`
`@@ -335,7 +333,7 @@ def _get_new_alpha(self, i1, i2, a1, a2, e1, e2, y1, y2):`
`335`	`333`	`l1 = a1 + s * (a2 - l)`
`336`	`334`	`h1 = a1 + s * (a2 - h)`
`337`	`335`
`338`		`- # way 1`
	`336`	`+ # Method 1`
`339`	`337`	`f1 = y1 * (e1 + b) - a1 * k(i1, i1) - s * a2 * k(i1, i2)`
`340`	`338`	`f2 = y2 * (e2 + b) - a2 * k(i2, i2) - s * a1 * k(i1, i2)`
`341`	`339`	`ol = (`
`@@ -353,9 +351,8 @@ def _get_new_alpha(self, i1, i2, a1, a2, e1, e2, y1, y2):`
`353`	`351`	`+ s * h * h1 * k(i1, i2)`
`354`	`352`	`)`
`355`	`353`	`"""`
`356`		`- # way 2`
`357`		`- Use objective function check which alpha2 new could get the minimal`
`358`		`- objectives`
	`354`	`+ Method 2: Use objective function to check which alpha2_new could achieve the`
	`355`	`+ minimal objectives`
`359`	`356`	`"""`
`360`	`357`	`if ol < (oh - self._eps):`
`361`	`358`	`a2_new = l`
`@@ -375,7 +372,7 @@ def _get_new_alpha(self, i1, i2, a1, a2, e1, e2, y1, y2):`
`375`	`372`
`376`	`373`	`return a1_new, a2_new`
`377`	`374`
`378`		`- # Normalise data using min_max way`
	`375`	`+ # Normalize data using min-max method`
`379`	`376`	`def _norm(self, data):`
`380`	`377`	`if self._init:`
`381`	`378`	`self._min = np.min(data, axis=0)`
`@@ -424,7 +421,7 @@ def _rbf(self, v1, v2):`
`424`	`421`
`425`	`422`	`def _check(self):`
`426`	`423`	`if self._kernel == self._rbf and self.gamma < 0:`
`427`		`- raise ValueError("gamma value must greater than 0")`
	`424`	`+ raise ValueError("gamma value must be non-negative")`
`428`	`425`
`429`	`426`	`def _get_kernel(self, kernel_name):`
`430`	`427`	`maps = {"linear": self._linear, "poly": self._polynomial, "rbf": self._rbf}`
`@@ -444,27 +441,27 @@ def call_func(args, *kwargs):`
`444`	`441`	`start_time = time.time()`
`445`	`442`	`func(args, *kwargs)`
`446`	`443`	`end_time = time.time()`
`447`		`- print(f"smo algorithm cost {end_time - start_time} seconds")`
	`444`	`+ print(f"SMO algorithm cost {end_time - start_time} seconds")`
`448`	`445`
`449`	`446`	`return call_func`
`450`	`447`
`451`	`448`
`452`	`449`	`@count_time`
`453`		`-def test_cancel_data():`
`454`		`- print("Hello!\nStart test svm by smo algorithm!")`
	`450`	`+def test_cancer_data():`
	`451`	`+ print("Hello!\nStart test SVM using the SMO algorithm!")`
`455`	`452`	`# 0: download dataset and load into pandas' dataframe`
`456`		`- if not os.path.exists(r"cancel_data.csv"):`
	`453`	`+ if not os.path.exists(r"cancer_data.csv"):`
`457`	`454`	`request = urllib.request.Request( # noqa: S310`
`458`	`455`	`CANCER_DATASET_URL,`
`459`	`456`	`headers={"User-Agent": "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)"},`
`460`	`457`	`)`
`461`	`458`	`response = urllib.request.urlopen(request) # noqa: S310`
`462`	`459`	`content = response.read().decode("utf-8")`
`463`		`- with open(r"cancel_data.csv", "w") as f:`
	`460`	`+ with open(r"cancer_data.csv", "w") as f:`
`464`	`461`	`f.write(content)`
`465`	`462`
`466`	`463`	`data = pd.read_csv(`
`467`		`- "cancel_data.csv",`
	`464`	`+ "cancer_data.csv",`
`468`	`465`	`header=None,`
`469`	`466`	`dtype={0: str}, # Assuming the first column contains string data`
`470`	`467`	`)`
`@@ -479,14 +476,14 @@ def test_cancel_data():`
`479`	`476`	`train_data, test_data = samples[:328, :], samples[328:, :]`
`480`	`477`	`test_tags, test_samples = test_data[:, 0], test_data[:, 1:]`
`481`	`478`
`482`		`- # 3: choose kernel function,and set initial alphas to zero(optional)`
`483`		`- mykernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5)`
	`479`	`+ # 3: choose kernel function,and set initial alphas to zero(optional)`
	`480`	`+ my_kernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5)`
`484`	`481`	`al = np.zeros(train_data.shape[0])`
`485`	`482`
`486`	`483`	`# 4: calculating best alphas using SMO algorithm and predict test_data samples`
`487`	`484`	`mysvm = SmoSVM(`
`488`	`485`	`train=train_data,`
`489`		`- kernel_func=mykernel,`
	`486`	`+ kernel_func=my_kernel,`
`490`	`487`	`alpha_list=al,`
`491`	`488`	`cost=0.4,`
`492`	`489`	`b=0.0,`
`@@ -501,30 +498,30 @@ def test_cancel_data():`
`501`	`498`	`for i in range(test_tags.shape[0]):`
`502`	`499`	`if test_tags[i] == predict[i]:`
`503`	`500`	`score += 1`
`504`		`- print(f"\nall: {test_num}\nright: {score}\nfalse: {test_num - score}")`
	`501`	`+ print(f"\nAll: {test_num}\nCorrect: {score}\nIncorrect: {test_num - score}")`
`505`	`502`	`print(f"Rough Accuracy: {score / test_tags.shape[0]}")`
`506`	`503`
`507`	`504`
`508`	`505`	`def test_demonstration():`
`509`	`506`	`# change stdout`
`510`		`- print("\nStart plot,please wait!!!")`
	`507`	`+ print("\nStarting plot,please wait!")`
`511`	`508`	`sys.stdout = open(os.devnull, "w")`
`512`	`509`
`513`	`510`	`ax1 = plt.subplot2grid((2, 2), (0, 0))`
`514`	`511`	`ax2 = plt.subplot2grid((2, 2), (0, 1))`
`515`	`512`	`ax3 = plt.subplot2grid((2, 2), (1, 0))`
`516`	`513`	`ax4 = plt.subplot2grid((2, 2), (1, 1))`
`517`		`- ax1.set_title("linear svm,cost:0.1")`
	`514`	`+ ax1.set_title("Linear SVM, cost = 0.1")`
`518`	`515`	`test_linear_kernel(ax1, cost=0.1)`
`519`		`- ax2.set_title("linear svm,cost:500")`
	`516`	`+ ax2.set_title("Linear SVM, cost = 500")`
`520`	`517`	`test_linear_kernel(ax2, cost=500)`
`521`		`- ax3.set_title("rbf kernel svm,cost:0.1")`
	`518`	`+ ax3.set_title("RBF kernel SVM, cost = 0.1")`
`522`	`519`	`test_rbf_kernel(ax3, cost=0.1)`
`523`		`- ax4.set_title("rbf kernel svm,cost:500")`
	`520`	`+ ax4.set_title("RBF kernel SVM, cost = 500")`
`524`	`521`	`test_rbf_kernel(ax4, cost=500)`
`525`	`522`
`526`	`523`	`sys.stdout = sys.__stdout__`
`527`		`- print("Plot done!!!")`
	`524`	`+ print("Plot done!")`
`528`	`525`
`529`	`526`
`530`	`527`	`def test_linear_kernel(ax, cost):`
`@@ -535,10 +532,10 @@ def test_linear_kernel(ax, cost):`
`535`	`532`	`scaler = StandardScaler()`
`536`	`533`	`train_x_scaled = scaler.fit_transform(train_x, train_y)`
`537`	`534`	`train_data = np.hstack((train_y.reshape(500, 1), train_x_scaled))`
`538`		`- mykernel = Kernel(kernel="linear", degree=5, coef0=1, gamma=0.5)`
	`535`	`+ my_kernel = Kernel(kernel="linear", degree=5, coef0=1, gamma=0.5)`
`539`	`536`	`mysvm = SmoSVM(`
`540`	`537`	`train=train_data,`
`541`		`- kernel_func=mykernel,`
	`538`	`+ kernel_func=my_kernel,`
`542`	`539`	`cost=cost,`
`543`	`540`	`tolerance=0.001,`
`544`	`541`	`auto_norm=False,`
`@@ -555,10 +552,10 @@ def test_rbf_kernel(ax, cost):`
`555`	`552`	`scaler = StandardScaler()`
`556`	`553`	`train_x_scaled = scaler.fit_transform(train_x, train_y)`
`557`	`554`	`train_data = np.hstack((train_y.reshape(500, 1), train_x_scaled))`
`558`		`- mykernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5)`
	`555`	`+ my_kernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5)`
`559`	`556`	`mysvm = SmoSVM(`
`560`	`557`	`train=train_data,`
`561`		`- kernel_func=mykernel,`
	`558`	`+ kernel_func=my_kernel,`
`562`	`559`	`cost=cost,`
`563`	`560`	`tolerance=0.001,`
`564`	`561`	`auto_norm=False,`
`@@ -571,11 +568,11 @@ def plot_partition_boundary(`
`571`	`568`	`model, train_data, ax, resolution=100, colors=("b", "k", "r")`
`572`	`569`	`):`
`573`	`570`	`"""`
`574`		`- We can not get the optimum w of our kernel svm model which is different from linear`
`575`		`- svm. For this reason, we generate randomly distributed points with high desity and`
`576`		`- prediced values of these points are calculated by using our trained model. Then we`
`577`		`- could use this prediced values to draw contour map.`
`578`		`- And this contour map can represent svm's partition boundary.`
	`571`	`+ We cannot get the optimal w of our kernel SVM model, which is different from a`
	`572`	`+ linear SVM. For this reason, we generate randomly distributed points with high`
	`573`	`+ density, and predicted values of these points are calculated using our trained`
	`574`	`+ model. Then we could use this predicted values to draw contour map, and this contour`
	`575`	`+ map represents the SVM's partition boundary.`
`579`	`576`	`"""`
`580`	`577`	`train_data_x = train_data[:, 1]`
`581`	`578`	`train_data_y = train_data[:, 2]`
`@@ -620,6 +617,6 @@ def plot_partition_boundary(`
`620`	`617`
`621`	`618`
`622`	`619`	`if __name__ == "__main__":`
`623`		`- test_cancel_data()`
	`620`	`+ test_cancer_data()`
`624`	`621`	`test_demonstration()`
`625`	`622`	`plt.show()`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit 4467423

File tree

1 file changed

1 file changed

`‎machine_learning/sequential_minimum_optimization.py‎`

0 commit comments