Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit c60b768

Browse files
Merge pull request #84 from DDSSS07/PSO_FS
Feature Selection using PSO ( Particle Swarm Optimization )
2 parents d6fb189 + 340d9c7 commit c60b768

File tree

2 files changed

+860
-0
lines changed

2 files changed

+860
-0
lines changed
Lines changed: 290 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,290 @@
1+
import numpy as np
2+
import pandas as pd
3+
import seaborn as sns
4+
from random import random
5+
from sklearn import metrics
6+
from sklearn.preprocessing import LabelEncoder
7+
from sklearn.model_selection import train_test_split
8+
from sklearn.model_selection import cross_validate
9+
from sklearn.linear_model import LogisticRegression
10+
from sklearn.metrics import confusion_matrix, make_scorer
11+
from sklearn.metrics import roc_auc_score, accuracy_score
12+
from sklearn.metrics import precision_score, recall_score
13+
14+
import warnings
15+
warnings.filterwarnings('ignore')
16+
17+
def classification_accuracy(y_actual, y_hat):
18+
TP = 0
19+
FP = 0
20+
TN = 0
21+
FN = 0
22+
23+
for i in range(len(y_hat)):
24+
if y_actual[i]==y_hat[i]==1:
25+
TP += 1
26+
if y_hat[i]==1 and y_actual[i]!=y_hat[i]:
27+
FP += 1
28+
if y_actual[i]==y_hat[i]==0:
29+
TN += 1
30+
if y_hat[i]==0 and y_actual[i]!=y_hat[i]:
31+
FN += 1
32+
33+
class_acc = float((TP+TN)) / float((TP+FP+TN+FN))
34+
35+
if TP == 0 and FN == 0 :
36+
recall = 0
37+
else:
38+
recall = float(TP) / float(TP + FN)
39+
40+
if TP == 0 and FP == 0:
41+
precision = 0
42+
else:
43+
precision = float(TP) / float( TP + FP )
44+
45+
return (class_acc, recall, precision)
46+
47+
def fitness_without_optimization(df1):
48+
49+
# Separate labels and features
50+
X = df1.drop(columns=['diagnosis'])
51+
y = df1['diagnosis']
52+
53+
# Convert the M to 1 and B to 0
54+
label = LabelEncoder()
55+
y = label.fit_transform(y)
56+
y[:20]
57+
58+
# Spilt the train and test data
59+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
60+
# we used 30% test data
61+
62+
# Logistic Regression
63+
LR = LogisticRegression()
64+
LR.fit(X_train, y_train)
65+
LR.score(X_train, y_train)
66+
y_pred = LR.predict(X_test)
67+
y_pred_train = LR.predict(X_train)
68+
69+
# find accuracy
70+
ac = accuracy_score(y_test, y_pred)
71+
ac_train = accuracy_score(y_train, y_pred_train)
72+
# Code for ROC_AUC curve
73+
rc = roc_auc_score(y_test, y_pred)
74+
75+
cm_2 = confusion_matrix(y_test, y_pred)
76+
77+
sns.heatmap(cm_2,annot=True,fmt="d")
78+
79+
class_acc = classification_accuracy(y_test, y_pred)
80+
81+
return class_acc
82+
83+
df = pd.read_csv('breast_cancer_data.csv')
84+
accuracy = fitness_without_optimization(df.copy())
85+
print('Accuracy :' + "{:.2f}".format(accuracy[0]))
86+
print('Precision :' + "{:.2f}".format(accuracy[1]))
87+
print('Recall :' + "{:.2f}".format(accuracy[2]))
88+
89+
class PSO:
90+
def __init__(self, f_count, df):
91+
92+
self.df = df.copy() # data
93+
self.f_count = f_count # Feature count
94+
self.pos_act = [] # Actual Positions radmon prob
95+
self.position = [] # Position prob > 0.5 set as 1 or 0
96+
self.velocity = [] # Velocity random between -1 and 1
97+
self.pos_best = [] # best position
98+
self.y_actual = [] # Y actual
99+
self.y_predict= [] # Y test predicted
100+
self.fit_best = (-1, -1, -1) # best fit accuracy, Recall, Precision
101+
self.fitness = (-1, -1, -1) # accuracy , recall, precsion
102+
103+
self.initialize(f_count)
104+
105+
def initialize(self, f_count):
106+
self.f_count = f_count
107+
self.initalize_position(f_count)
108+
self.initialize_velocity(f_count)
109+
110+
def set_data(self,data):
111+
self.df = data.copy()
112+
print(self.df.head())
113+
114+
#Initialize the positions > 0.5 is set as 1
115+
def initalize_position(self,f_count):
116+
self.pos_act = np.random.uniform(low=0, high=1, size=f_count).tolist()
117+
self.position = [1 if po > 0.5 else 0 for po in self.pos_act]
118+
119+
def initialize_velocity(self, f_count):
120+
self.velocity = np.random.uniform(low=-1, high=1, size=f_count).tolist()
121+
122+
def drop_columns(self, X):
123+
124+
for iteration, value in enumerate(self.position):
125+
if value == 0 :
126+
X_1 = X.drop(X.columns[iteration], axis = 1)
127+
return X_1
128+
129+
def classification_accuracy(self,y_actual, y_hat):
130+
TP = 0
131+
FP = 0
132+
TN = 0
133+
FN = 0
134+
135+
for i in range(len(y_hat)):
136+
if y_actual[i]==y_hat[i]==1:
137+
TP += 1
138+
if y_hat[i]==1 and y_actual[i]!=y_hat[i]:
139+
FP += 1
140+
if y_actual[i]==y_hat[i]==0:
141+
TN += 1
142+
if y_hat[i]==0 and y_actual[i]!=y_hat[i]:
143+
FN += 1
144+
145+
class_acc = float((TP+TN)) / float((TP+FP+TN+FN))
146+
147+
if TP == 0 and FN == 0 :
148+
recall = 0
149+
else:
150+
recall = float(TP) / float(TP + FN)
151+
if TP == 0 and FP == 0:
152+
precision = 0
153+
else:
154+
precision = float(TP) / float( TP + FP )
155+
156+
return (class_acc, recall, precision)
157+
158+
def process_data(self):
159+
160+
# Separate labels and features
161+
X = self.df.drop(columns=['diagnosis'])
162+
y = self.df['diagnosis']
163+
164+
X = self.drop_columns(X)
165+
166+
# Convert the M to 1 and B to 0
167+
label = LabelEncoder()
168+
y = label.fit_transform(y)
169+
y[:20]
170+
171+
# Spilt the train and test data
172+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
173+
# we used 30% test data
174+
# check the size before beginning
175+
X_train.shape, X_test.shape, y_train.shape, y_test.shape
176+
177+
# Logistic Regression
178+
LR = LogisticRegression()
179+
LR.fit(X_train, y_train)
180+
LR.score(X_train, y_train)
181+
y_pred = LR.predict(X_test)
182+
y_pred_train = LR.predict(X_train)
183+
184+
# find accuracy
185+
ac = accuracy_score(y_test, y_pred)
186+
ac_train = accuracy_score(y_train, y_pred_train)
187+
# Code for ROC_AUC curve
188+
rc = roc_auc_score(y_test, y_pred)
189+
190+
class_acc = self.classification_accuracy(y_test, y_pred)
191+
192+
self.y_actual = y_test
193+
self.y_predict = y_pred
194+
195+
return class_acc
196+
197+
# fitness check, checks accuarcy and precision and accurarcy
198+
def fitness_check(self,fitness, fit_best):
199+
is_fitness = False
200+
201+
if fitness[0] > fit_best[0] or fit_best[0] == -1:
202+
if fitness[1] >= fit_best[1] and fitness[2] >= fit_best[2]:
203+
is_fitness = True
204+
205+
return is_fitness
206+
207+
def evaluate_fitness(self):
208+
self.fitness = self.process_data()
209+
210+
if self.fitness_check(self.fitness, self.fit_best):
211+
self.pos_best = self.position.copy()
212+
self.fit_best = self.fitness
213+
214+
def update_velocity(self, pos_best_global):
215+
c1 = 1
216+
c2 = 2
217+
w = 0.5
218+
219+
for i in range(0, self.f_count):
220+
r1 = np.random.uniform(low=-1, high=1, size=1)[0]
221+
r2 = np.random.uniform(low=-1, high=1, size=1)[0]
222+
velocity_cog = c1*r1*(self.pos_best[i]-self.position[i])
223+
velocity_soc = c2*r2*(pos_best_global[i]-self.position[i])
224+
225+
self.velocity[i]=w*self.velocity[i]+velocity_cog+velocity_soc
226+
227+
def update_position(self):
228+
229+
for i in range(0, self.f_count):
230+
self.pos_act[i] = self.pos_act[i] + self.velocity[i]
231+
232+
#adjust max value
233+
234+
if self.pos_act[i] > 1 :
235+
self.pos_act[i] = 0.9
236+
237+
if self.pos_act[i] < 0 :
238+
self.pos_act[i] = 0.0
239+
240+
self.position[i] = 1 if self.pos_act[i] > 0.5 else 0
241+
242+
def print_position(self):
243+
print(self.position)
244+
245+
def print_velocity(self):
246+
print(self.velocity)
247+
248+
def pso_calculate(f_count, df):
249+
y_actual = []
250+
y_predict = []
251+
fitness_best_g = (-1, -1, -1)
252+
pos_fitness_g = []
253+
swarm = []
254+
no_population = 400
255+
iteration = 1
256+
257+
for i in range(0,no_population):
258+
swarm.append(PSO(f_count, df))
259+
260+
while iteration <= 10:
261+
262+
print('\nIteration : ', iteration)
263+
264+
for pos in range(0, no_population):
265+
266+
swarm[pos].evaluate_fitness()
267+
268+
#check current particle is the global best
269+
if swarm[pos].fitness_check(swarm[pos].fitness, fitness_best_g): #swarm[pos].fitness > fitness_best_g or fitness_best_g == -1:
270+
pos_fitness_g = list(swarm[pos].position)
271+
fitness_best_g = (swarm[pos].fitness)
272+
y_actual = swarm[pos].y_actual
273+
y_predict = swarm[pos].y_predict
274+
275+
for pos in range(0, no_population):
276+
swarm[pos].update_velocity(pos_fitness_g)
277+
swarm[pos].update_position()
278+
279+
print(pos_fitness_g)
280+
print(fitness_best_g)
281+
iteration+=1
282+
283+
284+
print('\n Final Solution:')
285+
print(pos_fitness_g)
286+
print(fitness_best_g)
287+
cm_2 = confusion_matrix(y_actual, y_predict)
288+
sns.heatmap(cm_2,annot=True,fmt="d")
289+
290+
pso_calculate(30,df)

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /