Commit c60b768

authored

Merge pull request #84 from DDSSS07/PSO_FS

Feature Selection using PSO ( Particle Swarm Optimization )

2 parents d6fb189 + 340d9c7 commit c60b768Copy full SHA for c60b768

File tree

2 files changed

+860

-0

lines changed

Algorithms/Searching
- PSO Feature Selection.py
- breast_cancer_data.csv

2 files changed

+860

-0

lines changed

`‎Algorithms/Searching/PSO Feature Selection.py`

Lines changed: 290 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,290 @@`
	`1`	`+import numpy as np`
	`2`	`+import pandas as pd`
	`3`	`+import seaborn as sns`
	`4`	`+from random import random`
	`5`	`+from sklearn import metrics`
	`6`	`+from sklearn.preprocessing import LabelEncoder`
	`7`	`+from sklearn.model_selection import train_test_split`
	`8`	`+from sklearn.model_selection import cross_validate`
	`9`	`+from sklearn.linear_model import LogisticRegression`
	`10`	`+from sklearn.metrics import confusion_matrix, make_scorer`
	`11`	`+from sklearn.metrics import roc_auc_score, accuracy_score`
	`12`	`+from sklearn.metrics import precision_score, recall_score`
	`13`	`+`
	`14`	`+import warnings`
	`15`	`+warnings.filterwarnings('ignore')`
	`16`	`+`
	`17`	`+def classification_accuracy(y_actual, y_hat):`
	`18`	`+ TP = 0`
	`19`	`+ FP = 0`
	`20`	`+ TN = 0`
	`21`	`+ FN = 0`
	`22`	`+`
	`23`	`+ for i in range(len(y_hat)):`
	`24`	`+ if y_actual[i]==y_hat[i]==1:`
	`25`	`+ TP += 1`
	`26`	`+ if y_hat[i]==1 and y_actual[i]!=y_hat[i]:`
	`27`	`+ FP += 1`
	`28`	`+ if y_actual[i]==y_hat[i]==0:`
	`29`	`+ TN += 1`
	`30`	`+ if y_hat[i]==0 and y_actual[i]!=y_hat[i]:`
	`31`	`+ FN += 1`
	`32`	`+`
	`33`	`+ class_acc = float((TP+TN)) / float((TP+FP+TN+FN))`
	`34`	`+`
	`35`	`+ if TP == 0 and FN == 0 :`
	`36`	`+ recall = 0`
	`37`	`+ else:`
	`38`	`+ recall = float(TP) / float(TP + FN)`
	`39`	`+`
	`40`	`+ if TP == 0 and FP == 0:`
	`41`	`+ precision = 0`
	`42`	`+ else:`
	`43`	`+ precision = float(TP) / float( TP + FP )`
	`44`	`+`
	`45`	`+ return (class_acc, recall, precision)`
	`46`	`+`
	`47`	`+def fitness_without_optimization(df1):`
	`48`	`+`
	`49`	`+ # Separate labels and features`
	`50`	`+ X = df1.drop(columns=['diagnosis'])`
	`51`	`+ y = df1['diagnosis']`
	`52`	`+`
	`53`	`+ # Convert the M to 1 and B to 0`
	`54`	`+ label = LabelEncoder()`
	`55`	`+ y = label.fit_transform(y)`
	`56`	`+ y[:20]`
	`57`	`+`
	`58`	`+ # Spilt the train and test data`
	`59`	`+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)`
	`60`	`+ # we used 30% test data`
	`61`	`+`
	`62`	`+ # Logistic Regression`
	`63`	`+ LR = LogisticRegression()`
	`64`	`+ LR.fit(X_train, y_train)`
	`65`	`+ LR.score(X_train, y_train)`
	`66`	`+ y_pred = LR.predict(X_test)`
	`67`	`+ y_pred_train = LR.predict(X_train)`
	`68`	`+`
	`69`	`+ # find accuracy`
	`70`	`+ ac = accuracy_score(y_test, y_pred)`
	`71`	`+ ac_train = accuracy_score(y_train, y_pred_train)`
	`72`	`+ # Code for ROC_AUC curve`
	`73`	`+ rc = roc_auc_score(y_test, y_pred)`
	`74`	`+`
	`75`	`+ cm_2 = confusion_matrix(y_test, y_pred)`
	`76`	`+`
	`77`	`+ sns.heatmap(cm_2,annot=True,fmt="d")`
	`78`	`+`
	`79`	`+ class_acc = classification_accuracy(y_test, y_pred)`
	`80`	`+`
	`81`	`+ return class_acc`
	`82`	`+`
	`83`	`+df = pd.read_csv('breast_cancer_data.csv')`
	`84`	`+accuracy = fitness_without_optimization(df.copy())`
	`85`	`+print('Accuracy :' + "{:.2f}".format(accuracy[0]))`
	`86`	`+print('Precision :' + "{:.2f}".format(accuracy[1]))`
	`87`	`+print('Recall :' + "{:.2f}".format(accuracy[2]))`
	`88`	`+`
	`89`	`+class PSO:`
	`90`	`+ def __init__(self, f_count, df):`
	`91`	`+`
	`92`	`+ self.df = df.copy() # data`
	`93`	`+ self.f_count = f_count # Feature count`
	`94`	`+ self.pos_act = [] # Actual Positions radmon prob`
	`95`	`+ self.position = [] # Position prob > 0.5 set as 1 or 0`
	`96`	`+ self.velocity = [] # Velocity random between -1 and 1`
	`97`	`+ self.pos_best = [] # best position`
	`98`	`+ self.y_actual = [] # Y actual`
	`99`	`+ self.y_predict= [] # Y test predicted`
	`100`	`+ self.fit_best = (-1, -1, -1) # best fit accuracy, Recall, Precision`
	`101`	`+ self.fitness = (-1, -1, -1) # accuracy , recall, precsion`
	`102`	`+`
	`103`	`+ self.initialize(f_count)`
	`104`	`+`
	`105`	`+ def initialize(self, f_count):`
	`106`	`+ self.f_count = f_count`
	`107`	`+ self.initalize_position(f_count)`
	`108`	`+ self.initialize_velocity(f_count)`
	`109`	`+`
	`110`	`+ def set_data(self,data):`
	`111`	`+ self.df = data.copy()`
	`112`	`+ print(self.df.head())`
	`113`	`+`
	`114`	`+ #Initialize the positions > 0.5 is set as 1`
	`115`	`+ def initalize_position(self,f_count):`
	`116`	`+ self.pos_act = np.random.uniform(low=0, high=1, size=f_count).tolist()`
	`117`	`+ self.position = [1 if po > 0.5 else 0 for po in self.pos_act]`
	`118`	`+`
	`119`	`+ def initialize_velocity(self, f_count):`
	`120`	`+ self.velocity = np.random.uniform(low=-1, high=1, size=f_count).tolist()`
	`121`	`+`
	`122`	`+ def drop_columns(self, X):`
	`123`	`+`
	`124`	`+ for iteration, value in enumerate(self.position):`
	`125`	`+ if value == 0 :`
	`126`	`+ X_1 = X.drop(X.columns[iteration], axis = 1)`
	`127`	`+ return X_1`
	`128`	`+`
	`129`	`+ def classification_accuracy(self,y_actual, y_hat):`
	`130`	`+ TP = 0`
	`131`	`+ FP = 0`
	`132`	`+ TN = 0`
	`133`	`+ FN = 0`
	`134`	`+`
	`135`	`+ for i in range(len(y_hat)):`
	`136`	`+ if y_actual[i]==y_hat[i]==1:`
	`137`	`+ TP += 1`
	`138`	`+ if y_hat[i]==1 and y_actual[i]!=y_hat[i]:`
	`139`	`+ FP += 1`
	`140`	`+ if y_actual[i]==y_hat[i]==0:`
	`141`	`+ TN += 1`
	`142`	`+ if y_hat[i]==0 and y_actual[i]!=y_hat[i]:`
	`143`	`+ FN += 1`
	`144`	`+`
	`145`	`+ class_acc = float((TP+TN)) / float((TP+FP+TN+FN))`
	`146`	`+`
	`147`	`+ if TP == 0 and FN == 0 :`
	`148`	`+ recall = 0`
	`149`	`+ else:`
	`150`	`+ recall = float(TP) / float(TP + FN)`
	`151`	`+ if TP == 0 and FP == 0:`
	`152`	`+ precision = 0`
	`153`	`+ else:`
	`154`	`+ precision = float(TP) / float( TP + FP )`
	`155`	`+`
	`156`	`+ return (class_acc, recall, precision)`
	`157`	`+`
	`158`	`+ def process_data(self):`
	`159`	`+`
	`160`	`+ # Separate labels and features`
	`161`	`+ X = self.df.drop(columns=['diagnosis'])`
	`162`	`+ y = self.df['diagnosis']`
	`163`	`+`
	`164`	`+ X = self.drop_columns(X)`
	`165`	`+`
	`166`	`+ # Convert the M to 1 and B to 0`
	`167`	`+ label = LabelEncoder()`
	`168`	`+ y = label.fit_transform(y)`
	`169`	`+ y[:20]`
	`170`	`+`
	`171`	`+ # Spilt the train and test data`
	`172`	`+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)`
	`173`	`+ # we used 30% test data`
	`174`	`+ # check the size before beginning`
	`175`	`+ X_train.shape, X_test.shape, y_train.shape, y_test.shape`
	`176`	`+`
	`177`	`+ # Logistic Regression`
	`178`	`+ LR = LogisticRegression()`
	`179`	`+ LR.fit(X_train, y_train)`
	`180`	`+ LR.score(X_train, y_train)`
	`181`	`+ y_pred = LR.predict(X_test)`
	`182`	`+ y_pred_train = LR.predict(X_train)`
	`183`	`+`
	`184`	`+ # find accuracy`
	`185`	`+ ac = accuracy_score(y_test, y_pred)`
	`186`	`+ ac_train = accuracy_score(y_train, y_pred_train)`
	`187`	`+ # Code for ROC_AUC curve`
	`188`	`+ rc = roc_auc_score(y_test, y_pred)`
	`189`	`+`
	`190`	`+ class_acc = self.classification_accuracy(y_test, y_pred)`
	`191`	`+`
	`192`	`+ self.y_actual = y_test`
	`193`	`+ self.y_predict = y_pred`
	`194`	`+`
	`195`	`+ return class_acc`
	`196`	`+`
	`197`	`+ # fitness check, checks accuarcy and precision and accurarcy`
	`198`	`+ def fitness_check(self,fitness, fit_best):`
	`199`	`+ is_fitness = False`
	`200`	`+`
	`201`	`+ if fitness[0] > fit_best[0] or fit_best[0] == -1:`
	`202`	`+ if fitness[1] >= fit_best[1] and fitness[2] >= fit_best[2]:`
	`203`	`+ is_fitness = True`
	`204`	`+`
	`205`	`+ return is_fitness`
	`206`	`+`
	`207`	`+ def evaluate_fitness(self):`
	`208`	`+ self.fitness = self.process_data()`
	`209`	`+`
	`210`	`+ if self.fitness_check(self.fitness, self.fit_best):`
	`211`	`+ self.pos_best = self.position.copy()`
	`212`	`+ self.fit_best = self.fitness`
	`213`	`+`
	`214`	`+ def update_velocity(self, pos_best_global):`
	`215`	`+ c1 = 1`
	`216`	`+ c2 = 2`
	`217`	`+ w = 0.5`
	`218`	`+`
	`219`	`+ for i in range(0, self.f_count):`
	`220`	`+ r1 = np.random.uniform(low=-1, high=1, size=1)[0]`
	`221`	`+ r2 = np.random.uniform(low=-1, high=1, size=1)[0]`
	`222`	`+ velocity_cog = c1r1(self.pos_best[i]-self.position[i])`
	`223`	`+ velocity_soc = c2r2(pos_best_global[i]-self.position[i])`
	`224`	`+`
	`225`	`+ self.velocity[i]=w*self.velocity[i]+velocity_cog+velocity_soc`
	`226`	`+`
	`227`	`+ def update_position(self):`
	`228`	`+`
	`229`	`+ for i in range(0, self.f_count):`
	`230`	`+ self.pos_act[i] = self.pos_act[i] + self.velocity[i]`
	`231`	`+`
	`232`	`+ #adjust max value`
	`233`	`+`
	`234`	`+ if self.pos_act[i] > 1 :`
	`235`	`+ self.pos_act[i] = 0.9`
	`236`	`+`
	`237`	`+ if self.pos_act[i] < 0 :`
	`238`	`+ self.pos_act[i] = 0.0`
	`239`	`+`
	`240`	`+ self.position[i] = 1 if self.pos_act[i] > 0.5 else 0`
	`241`	`+`
	`242`	`+ def print_position(self):`
	`243`	`+ print(self.position)`
	`244`	`+`
	`245`	`+ def print_velocity(self):`
	`246`	`+ print(self.velocity)`
	`247`	`+`
	`248`	`+def pso_calculate(f_count, df):`
	`249`	`+ y_actual = []`
	`250`	`+ y_predict = []`
	`251`	`+ fitness_best_g = (-1, -1, -1)`
	`252`	`+ pos_fitness_g = []`
	`253`	`+ swarm = []`
	`254`	`+ no_population = 400`
	`255`	`+ iteration = 1`
	`256`	`+`
	`257`	`+ for i in range(0,no_population):`
	`258`	`+ swarm.append(PSO(f_count, df))`
	`259`	`+`
	`260`	`+ while iteration <= 10:`
	`261`	`+`
	`262`	`+ print('\nIteration : ', iteration)`
	`263`	`+`
	`264`	`+ for pos in range(0, no_population):`
	`265`	`+`
	`266`	`+ swarm[pos].evaluate_fitness()`
	`267`	`+`
	`268`	`+ #check current particle is the global best`
	`269`	`+ if swarm[pos].fitness_check(swarm[pos].fitness, fitness_best_g): #swarm[pos].fitness > fitness_best_g or fitness_best_g == -1:`
	`270`	`+ pos_fitness_g = list(swarm[pos].position)`
	`271`	`+ fitness_best_g = (swarm[pos].fitness)`
	`272`	`+ y_actual = swarm[pos].y_actual`
	`273`	`+ y_predict = swarm[pos].y_predict`
	`274`	`+`
	`275`	`+ for pos in range(0, no_population):`
	`276`	`+ swarm[pos].update_velocity(pos_fitness_g)`
	`277`	`+ swarm[pos].update_position()`
	`278`	`+`
	`279`	`+ print(pos_fitness_g)`
	`280`	`+ print(fitness_best_g)`
	`281`	`+ iteration+=1`
	`282`	`+`
	`283`	`+`
	`284`	`+ print('\n Final Solution:')`
	`285`	`+ print(pos_fitness_g)`
	`286`	`+ print(fitness_best_g)`
	`287`	`+ cm_2 = confusion_matrix(y_actual, y_predict)`
	`288`	`+ sns.heatmap(cm_2,annot=True,fmt="d")`
	`289`	`+`
	`290`	`+pso_calculate(30,df)`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit c60b768

File tree

2 files changed

2 files changed

`‎Algorithms/Searching/PSO Feature Selection.py`

0 commit comments