Name	Name	Last commit message	Last commit date
Latest commit History 16 Commits
11222.png	11222.png
1_aOXjpKnAUdxYeu2_wJGtLA.png	1_aOXjpKnAUdxYeu2_wJGtLA.png
1_nqv9cVgNQpZDOJuz0Q6L0A.jpeg	1_nqv9cVgNQpZDOJuz0Q6L0A.jpeg
Classification Linear.ipynb	Classification Linear.ipynb
Classification Linear.md	Classification Linear.md
LICENSE	LICENSE
README.md	README.md
Screenshot from 2020年02月21日 16-03-01.png	Screenshot from 2020年02月21日 16-03-01.png
output_21_0.png	output_21_0.png
output_6_1.png	output_6_1.png

scratch-binary-linear-classification

binary linear classification from scratch with sigmoid function based gradient decente

Function sigmoid

Z est une fonction linear de la forme : Z = X.delta (teta est l'ensemble des parametres associe a chaque feature de notre dataset -- X est donc une matrice contenant l'ensemble du dataset.)

L'objectif de la sigmoid est de nous aider a trouver une frontiere de decision.

png

Representation (0, 1)

La particularité de fonction est d'etre comprise en 0 et 1 png

A partir de cette fonction, on définit une frontière de décision au seuil à 0.5 comme ceci : y = 0 si sigmoid(X.delta) < 0.5 y = 1 si sigmoid(X.delta) >= 0.5

Source : Guillaume - Machine Learnia

Import tools

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
import pandas as pd
import seaborn as sns

Create data

np.random.seed(1)
X, y = make_classification(n_samples=200,n_features=2, n_redundant=0, n_informative=1, n_clusters_per_class=1)

X.shape

(200, 2)

y.shape

(200,)

plt.figure(figsize=(8, 6))
plt.scatter(X[:,0], X[:, 1], marker='o', c=y, edgecolors='k')

<matplotlib.collections.PathCollection at 0x7f08b6ababa8>

png

X[:10,:]

array([[ 1.23225307, -2.47410149],
 [-0.10679399, 1.21417321],
 [-0.89055558, -2.67772702],
 [ 1.19268607, 0.56160925],
 [-0.8301719 , 1.15856878],
 [ 1.95487808, -1.44594526],
 [-0.19505734, 1.10997919],
 [-0.34498721, -0.91240907],
 [-0.23643695, 0.91176636],
 [-1.30653407, 0.71240572]])

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

X_train.shape

(160, 2)

X_test.shape

(40, 2)

Create model

class ClassificationLinearCustom():
 """
 Implementation Classification Linear Model.
 """
 
 def __init__(self, learning_rate=0.0001, max_iter=100):
 """
 Initializer parameters model
 """
 self.lrate=learning_rate
 self.max_iter=max_iter
 self.coefs_=None
 self.error_gradient = None
 self.is_fited = False
 
 def sigmoid(self, coefs, x):
 """
 Sigmoid funtion
 """
 return 1/(1+np.exp(-(coefs*x).sum()))
 
 def error_coefs(self, coefs, xi, yi, m, lr):
 """
 Calcul coefs gradient:
 -- Parameters :
 - xi : value for one sample in data
 - yi : real value prediction for this precedent sample
 - m : dataset size
 - lr : learning rate
 """
 return lr/m * (xi.T*self.sigmoid(coefs, xi)-yi)
 
 
 def fit(self,x, y):
 """
 Fit fuction, learning parameters
 -- Parameters:
 - x, sample data
 - y, predict data
 """
 #scalling data
 x = (x-x.min())/(x.max()-x.min())
 
 if x.shape[0] != y.shape[0]:
 return ValueError("x and y must have same sample")
 
 m = x.shape[0] # size du dataset
 self.coefs_ = 1 # nuers of features
 for _ in range(self.max_iter):
 for xi,yi in zip(x,y):
 self.coefs_ -= self.error_coefs(self.coefs_, xi, yi, m, self.lrate) 
 
 self.is_fited = True
 print('ClassificationLinearCustom(learning_rate={}, max_iter={})'.format(self.lrate, self.max_iter))
 
 def predict(self, x):
 """
 Predict function : 
 -- Parameters:
 - x, sample data what to predict
 """
 ypred_proba = []
 if not self.is_fited:
 return ValueError("model must fited after predict")
 if x.shape[1] != self.coefs_.shape[0]:
 return ValueError("The features of x do not have the same size as those to train")
 for xi in x:
 ypred_proba +=[1] if self.sigmoid(self.coefs_, xi) >= 0.5 else [0]
 return np.array(ypred_proba)

Test Model Custom

cf = ClassificationLinearCustom()

cf.fit(X_train, y_train)

ClassificationLinearCustom(learning_rate=0.0001, max_iter=100)

cf.coefs_

array([1.00140568, 1.00136496])

from sklearn.metrics import accuracy_score

yp = cf.predict(X_test)

yp

array([1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1,
 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1])

Decision boundary training

h = .02
colors = "bry"
x_min, x_max = X_train[:, 0].min() - 1, X_train[:, 0].max() + 1
y_min, y_max = X_train[:, 1].min() - 1, X_train[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = cf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
cs = plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)
plt.axis('tight')
for i, color in zip([0,1], colors):
 idx = np.where(y_train == i)
 plt.scatter(X_train[idx, 0], X_train[idx, 1], c=color, cmap=plt.cm.Paired, edgecolor='black', s=20)

png

from sklearn.metrics import classification_report, f1_score, confusion_matrix

print(f1_score(y_test, yp))

0.92

tn, fp, fn, tp = confusion_matrix(y_test, yp).ravel()
print('True Negative :', tn)
print('False Positive :', fp)
print('False Negative :', fn)
print('True Positive :', tp)

True Negative : 13
False Positive : 4
False Negative : 0
True Positive : 23

print(classification_report(y_test, yp))

 precision recall f1-score support
 0 1.00 0.76 0.87 17
 1 0.85 1.00 0.92 23
 accuracy 0.90 40
 macro avg 0.93 0.88 0.89 40
weighted avg 0.91 0.90 0.90 40

Model Sklearn

from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import SGDClassifier, LogisticRegression
from sklearn.svm import SVC

cfk = KNeighborsClassifier()

cfk.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
 metric_params=None, n_jobs=None, n_neighbors=5, p=2,
 weights='uniform')

f1_score(y_test, cfk.predict(X_test))

0.9777777777777777

tn, fp, fn, tp = confusion_matrix(y_test, cfk.predict(X_test)).ravel()
print('True Negative :', tn)
print('False Positive :', fp)
print('False Negative :', fn)
print('True Positive :', tp)

True Negative : 17
False Positive : 0
False Negative : 1
True Positive : 22

print(classification_report(y_test, cfk.predict(X_test)))

 precision recall f1-score support
 0 0.94 1.00 0.97 17
 1 1.00 0.96 0.98 23
 accuracy 0.97 40
 macro avg 0.97 0.98 0.97 40
weighted avg 0.98 0.97 0.98 40

cfks = SGDClassifier()

cfks.fit(X_train, y_train)

SGDClassifier(alpha=0.0001, average=False, class_weight=None,
 early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
 l1_ratio=0.15, learning_rate='optimal', loss='hinge',
 max_iter=1000, n_iter_no_change=5, n_jobs=None, penalty='l2',
 power_t=0.5, random_state=None, shuffle=True, tol=0.001,
 validation_fraction=0.1, verbose=0, warm_start=False)

f1_score(y_test, cfks.predict(X_test))

0.9565217391304348

tn, fp, fn, tp = confusion_matrix(y_test, cfks.predict(X_test)).ravel()
print('True Negative :', tn)
print('False Positive :', fp)
print('False Negative :', fn)
print('True Positive :', tp)

True Negative : 16
False Positive : 1
False Negative : 1
True Positive : 22

print(classification_report(y_test, cfks.predict(X_test)))

 precision recall f1-score support
 0 0.94 0.94 0.94 17
 1 0.96 0.96 0.96 23
 accuracy 0.95 40
 macro avg 0.95 0.95 0.95 40
weighted avg 0.95 0.95 0.95 40

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

License

Uh oh!

hadpro24/scratch-binary-linear-classification

Folders and files

Latest commit

History

Repository files navigation

scratch-binary-linear-classification

Function sigmoid

Representation (0, 1)

Import tools

Create data

Create model

Test Model Custom

Decision boundary training

Model Sklearn

About

Topics

Resources

License

Uh oh!

Stars

Watchers

Forks

Releases

Packages

Languages

License

hadpro24/scratch-binary-linear-classification

Folders and files

Latest commit

History

Repository files navigation

scratch-binary-linear-classification

Function sigmoid

Representation (0, 1)

Import tools

Create data

Create model

Test Model Custom

Decision boundary training

Model Sklearn

About

Topics

Resources

License

Uh oh!

Stars

Watchers

Forks

Releases

Packages 0

Languages

Packages