Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 54afff2

Browse files
Create K-NearestNeighbors.py
1 parent 2ca6740 commit 54afff2

File tree

1 file changed

+81
-0
lines changed

1 file changed

+81
-0
lines changed

‎K-NearestNeighbors.py‎

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Created on Sun Jun 21 14:06:04 2015
4+
5+
@author: Pavitrakumar
6+
Credits: Jason Brownlee[Machinelearningmastery.com]
7+
"""
8+
9+
from __future__ import division
10+
import numpy as np
11+
from sklearn import datasets
12+
from sklearn import cross_validation
13+
from sklearn.metrics import mean_squared_error
14+
import math
15+
import operator
16+
17+
"""
18+
Euclidean distance measure: This is defined as the square root of the sum of the
19+
squared differences between the two arrays of numbers
20+
"""
21+
22+
def euclideanDistance(instance1, instance2, no_of_features):
23+
distance = 0
24+
for x in range(no_of_features):
25+
distance += pow((instance1[x] - instance2[x]), 2)
26+
return math.sqrt(distance)
27+
28+
"""
29+
getNeighbors function returns k most similar neighbors from the training set
30+
for a given test instance (using the already defined euclideanDistance function)
31+
"""
32+
def getNeighbors(X_train,y_train, test_instance, k):
33+
# getting the k-nearest neighbors of the data point testInsatance
34+
distances = []
35+
no_of_features = len(test_instance)
36+
for x,y in zip(X_train,y_train):
37+
# we are finding distance from each training example to out testInstance data point
38+
# and storing it as a list of pairs i.e (ith training example's response,distance to our instance data point)
39+
dist = euclideanDistance(test_instance, x, no_of_features)
40+
distances.append((y, dist))
41+
distances.sort(key=operator.itemgetter(1))
42+
#sorting the list by the 2nd element in each pair - sorting by distance
43+
#extracting the top k elements from the sorted list
44+
#we only need the response
45+
neighbors = [response for (response,distance) in distances]
46+
neighbors = neighbors[0:k]
47+
return neighbors
48+
"""
49+
getReponse just returns the most commonly occuring class in the given set of neighbors
50+
"""
51+
def getResponse(neighbors):
52+
# neighbors is a vector of length k
53+
# now, all we need to do is to find the most occuring class
54+
counts = np.bincount(neighbors)
55+
max_count = np.argmax(counts)
56+
return max_count
57+
58+
def predict(X_test,X_train,y_train,k = 5):
59+
predicted = []
60+
for each_test_instance in X_test:
61+
neighbors = getNeighbors(X_train,y_train,each_test_instance,k)
62+
predicted.append(getResponse(neighbors))
63+
return predicted
64+
65+
66+
"""
67+
testing using IRIS data set
68+
"""
69+
70+
iris = datasets.load_iris()
71+
X = iris.data
72+
y = iris.target
73+
74+
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.8)
75+
76+
77+
pred = predict(X_test,X_train,y_train)
78+
79+
from sklearn.metrics import accuracy_score
80+
81+
print accuracy_score(y_test,pred)

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /