|
| 1 | + |
| 2 | +import numpy as np |
| 3 | +import matplotlib.pyplot as plt |
| 4 | +from sklearn.datasets import make_blobs |
| 5 | +from sklearn.cluster import KMeans |
| 6 | +from sklearn import metrics |
| 7 | + |
| 8 | + |
| 9 | +# X为样本特征,Y为样本簇类别, 共1000个样本,每个样本2个特征,共4个簇,簇中心在[-1,-1], [0,0],[1,1], [2,2], 簇方差分别为[0.5,0.5, 0.5, 0.5] |
| 10 | +sample_dots, cluster_id = make_blobs(n_samples=1000, n_features=2, centers=[[-1,-1], [0,0], [1,1], [2,2]], cluster_std=[0.5, 0.5, 0.5, 0.5], random_state =9) |
| 11 | +x_location = sample_dots[:,0] |
| 12 | +y_location = sample_dots[:,1] |
| 13 | +plt.scatter(x_location,y_location, marker='o') |
| 14 | + |
| 15 | + |
| 16 | +kmeans = KMeans(init='random',n_init = 1,n_clusters=4, random_state=9) |
| 17 | +pred_cluster_id = kmeans.fit_predict(sample_dots) |
| 18 | +plt.scatter(x_location,y_location, c = pred_cluster_id) |
| 19 | + |
| 20 | +centroids = kmeans.fit(sample_dots).cluster_centers_ |
| 21 | +x_centroid = centroids[:,0] |
| 22 | +y_centroid = centroids[:,1] |
| 23 | +plt.scatter(x_centroid,y_centroid,s = 400,marker='*') |
| 24 | + |
| 25 | +plt.show() |
| 26 | +score1 = metrics.calinski_harabasz_score(sample_dots, pred_cluster_id) |
| 27 | +score2 = metrics.calinski_harabasz_score(sample_dots, cluster_id) |
| 28 | +print(score1) |
| 29 | +print(score2) |
0 commit comments