Commit 54afff2

committed

Create K-NearestNeighbors.py

1 parent 2ca6740 commit 54afff2Copy full SHA for 54afff2

File tree

1 file changed

+81

-0

lines changed

K-NearestNeighbors.py

1 file changed

+81

-0

lines changed

`‎K-NearestNeighbors.py‎`

Lines changed: 81 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,81 @@`
	`1`	`+# -- coding: utf-8 --`
	`2`	`+"""`
	`3`	`+Created on Sun Jun 21 14:06:04 2015`
	`4`	`+`
	`5`	`+@author: Pavitrakumar`
	`6`	`+Credits: Jason Brownlee[Machinelearningmastery.com]`
	`7`	`+"""`
	`8`	`+`
	`9`	`+from __future__ import division`
	`10`	`+import numpy as np`
	`11`	`+from sklearn import datasets`
	`12`	`+from sklearn import cross_validation`
	`13`	`+from sklearn.metrics import mean_squared_error`
	`14`	`+import math`
	`15`	`+import operator`
	`16`	`+`
	`17`	`+"""`
	`18`	`+Euclidean distance measure: This is defined as the square root of the sum of the`
	`19`	`+squared differences between the two arrays of numbers`
	`20`	`+"""`
	`21`	`+`
	`22`	`+def euclideanDistance(instance1, instance2, no_of_features):`
	`23`	`+ distance = 0`
	`24`	`+ for x in range(no_of_features):`
	`25`	`+ distance += pow((instance1[x] - instance2[x]), 2)`
	`26`	`+ return math.sqrt(distance)`
	`27`	`+`
	`28`	`+"""`
	`29`	`+getNeighbors function returns k most similar neighbors from the training set`
	`30`	`+for a given test instance (using the already defined euclideanDistance function)`
	`31`	`+"""`
	`32`	`+def getNeighbors(X_train,y_train, test_instance, k):`
	`33`	`+ # getting the k-nearest neighbors of the data point testInsatance`
	`34`	`+ distances = []`
	`35`	`+ no_of_features = len(test_instance)`
	`36`	`+ for x,y in zip(X_train,y_train):`
	`37`	`+ # we are finding distance from each training example to out testInstance data point`
	`38`	`+ # and storing it as a list of pairs i.e (ith training example's response,distance to our instance data point)`
	`39`	`+ dist = euclideanDistance(test_instance, x, no_of_features)`
	`40`	`+ distances.append((y, dist))`
	`41`	`+ distances.sort(key=operator.itemgetter(1))`
	`42`	`+ #sorting the list by the 2nd element in each pair - sorting by distance`
	`43`	`+ #extracting the top k elements from the sorted list`
	`44`	`+ #we only need the response`
	`45`	`+ neighbors = [response for (response,distance) in distances]`
	`46`	`+ neighbors = neighbors[0:k]`
	`47`	`+ return neighbors`
	`48`	`+"""`
	`49`	`+getReponse just returns the most commonly occuring class in the given set of neighbors`
	`50`	`+"""`
	`51`	`+def getResponse(neighbors):`
	`52`	`+ # neighbors is a vector of length k`
	`53`	`+ # now, all we need to do is to find the most occuring class`
	`54`	`+ counts = np.bincount(neighbors)`
	`55`	`+ max_count = np.argmax(counts)`
	`56`	`+ return max_count`
	`57`	`+`
	`58`	`+def predict(X_test,X_train,y_train,k = 5):`
	`59`	`+ predicted = []`
	`60`	`+ for each_test_instance in X_test:`
	`61`	`+ neighbors = getNeighbors(X_train,y_train,each_test_instance,k)`
	`62`	`+ predicted.append(getResponse(neighbors))`
	`63`	`+ return predicted`
	`64`	`+`
	`65`	`+`
	`66`	`+"""`
	`67`	`+testing using IRIS data set`
	`68`	`+"""`
	`69`	`+`
	`70`	`+iris = datasets.load_iris()`
	`71`	`+X = iris.data`
	`72`	`+y = iris.target`
	`73`	`+`
	`74`	`+X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.8)`
	`75`	`+`
	`76`	`+`
	`77`	`+pred = predict(X_test,X_train,y_train)`
	`78`	`+`
	`79`	`+from sklearn.metrics import accuracy_score`
	`80`	`+`
	`81`	`+print accuracy_score(y_test,pred)`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit 54afff2

File tree

1 file changed

1 file changed

`‎K-NearestNeighbors.py‎`

0 commit comments