Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 4026f9e

Browse files
更新第三章:数据科学-sklearn, cluster。
1 parent 4e1714f commit 4026f9e

File tree

14 files changed

+1571
-720
lines changed

14 files changed

+1571
-720
lines changed

‎03_data_science/03_scikit-learn/kmeans/plot_cluster_iris.ipynb‎

Lines changed: 12 additions & 143 deletions
Large diffs are not rendered by default.

‎03_data_science/03_scikit-learn/kmeans/plot_color_quantization.ipynb‎

Lines changed: 6 additions & 168 deletions
Large diffs are not rendered by default.

‎03_data_science/03_scikit-learn/kmeans/plot_kmeans_stability_low_dim_dense.ipynb‎

Lines changed: 12 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,7 @@
33
{
44
"cell_type": "code",
55
"execution_count": null,
6-
"metadata": {
7-
"collapsed": false
8-
},
6+
"metadata": {},
97
"outputs": [],
108
"source": [
119
"%matplotlib inline"
@@ -42,50 +40,21 @@
4240
{
4341
"cell_type": "markdown",
4442
"metadata": {},
45-
"source": [
46-
"基于经验的k-means初始化方法\n",
47-
"\n",
48-
"评估k-均值初始化的能力,以使算法收敛稳健,如通过聚类惯性的相对标准偏差(即到最近聚类中心的平方距离之和)测量的。\n",
49-
"\n",
50-
"第一个图显示了最佳初始化参数(``KMeans`` or ``MiniBatchKMeans``)和init方法(``init=\"random\"`` or ``init=\"kmeans++\"``)的选择。\n",
51-
"\n",
52-
"第二个图显示了使用``init=\"random\"`` and ``n_init=1``的``MiniBatchKMeans``一次运行结果。这种运行导致一个坏的收敛(局部最优)。\n",
53-
"\n",
54-
"用于评估的数据集是符合高斯分布的2D网格数据。"
55-
]
43+
"source": []
5644
},
5745
{
5846
"cell_type": "code",
5947
"execution_count": 3,
60-
"metadata": {
61-
"collapsed": false
62-
},
48+
"metadata": {},
6349
"outputs": [
6450
{
6551
"name": "stdout",
6652
"output_type": "stream",
6753
"text": [
68-
"Automatically created module for IPython interactive environment\nEvaluation of KMeans with k-means++ init\n"
69-
]
70-
},
71-
{
72-
"name": "stdout",
73-
"output_type": "stream",
74-
"text": [
75-
"Evaluation of KMeans with random init\n"
76-
]
77-
},
78-
{
79-
"name": "stdout",
80-
"output_type": "stream",
81-
"text": [
82-
"Evaluation of MiniBatchKMeans with k-means++ init\n"
83-
]
84-
},
85-
{
86-
"name": "stdout",
87-
"output_type": "stream",
88-
"text": [
54+
"Automatically created module for IPython interactive environment\n",
55+
"Evaluation of KMeans with k-means++ init\n",
56+
"Evaluation of KMeans with random init\n",
57+
"Evaluation of MiniBatchKMeans with k-means++ init\n",
8958
"Evaluation of MiniBatchKMeans with random init\n"
9059
]
9160
},
@@ -114,109 +83,15 @@
11483
"print(__doc__)\n",
11584
"\n",
11685
"# Author: Olivier Grisel <olivier.grisel@ensta.org>\n",
117-
"# License: BSD 3 clause\n",
118-
"\n",
119-
"import numpy as np\n",
120-
"import matplotlib.pyplot as plt\n",
121-
"import matplotlib.cm as cm\n",
122-
"\n",
123-
"from sklearn.utils import shuffle\n",
124-
"from sklearn.utils import check_random_state\n",
125-
"from sklearn.cluster import MiniBatchKMeans\n",
126-
"from sklearn.cluster import KMeans\n",
127-
"\n",
128-
"random_state = np.random.RandomState(0)\n",
129-
"\n",
130-
"# Number of run (with randomly generated dataset) for each strategy so as\n",
131-
"# to be able to compute an estimate of the standard deviation\n",
132-
"n_runs = 5\n",
133-
"\n",
134-
"# k-means models can do several random inits so as to be able to trade\n",
135-
"# CPU time for convergence robustness\n",
136-
"n_init_range = np.array([1, 5, 10, 15, 20])\n",
137-
"\n",
138-
"# Datasets generation parameters\n",
139-
"n_samples_per_center = 100\n",
140-
"grid_size = 3\n",
141-
"scale = 0.1\n",
142-
"n_clusters = grid_size ** 2\n",
143-
"\n",
144-
"\n",
145-
"def make_data(random_state, n_samples_per_center, grid_size, scale):\n",
146-
" random_state = check_random_state(random_state)\n",
147-
" centers = np.array([[i, j]\n",
148-
" for i in range(grid_size)\n",
149-
" for j in range(grid_size)])\n",
150-
" n_clusters_true, n_features = centers.shape\n",
151-
"\n",
152-
" noise = random_state.normal(\n",
153-
" scale=scale, size=(n_samples_per_center, centers.shape[1]))\n",
154-
"\n",
155-
" X = np.concatenate([c + noise for c in centers])\n",
156-
" y = np.concatenate([[i] * n_samples_per_center\n",
157-
" for i in range(n_clusters_true)])\n",
158-
" return shuffle(X, y, random_state=random_state)\n",
159-
"\n",
160-
"# Part 1: Quantitative evaluation of various init methods\n",
161-
"\n",
162-
"plt.figure()\n",
163-
"plots = []\n",
164-
"legends = []\n",
165-
"\n",
166-
"cases = [\n",
167-
" (KMeans, 'k-means++', {}),\n",
168-
" (KMeans, 'random', {}),\n",
169-
" (MiniBatchKMeans, 'k-means++', {'max_no_improvement': 3}),\n",
170-
" (MiniBatchKMeans, 'random', {'max_no_improvement': 3, 'init_size': 500}),\n",
171-
"]\n",
172-
"\n",
173-
"for factory, init, params in cases:\n",
174-
" print(\"Evaluation of %s with %s init\" % (factory.__name__, init))\n",
175-
" inertia = np.empty((len(n_init_range), n_runs))\n",
176-
"\n",
177-
" for run_id in range(n_runs):\n",
178-
" X, y = make_data(run_id, n_samples_per_center, grid_size, scale)\n",
179-
" for i, n_init in enumerate(n_init_range):\n",
180-
" km = factory(n_clusters=n_clusters, init=init, random_state=run_id,\n",
181-
" n_init=n_init, **params).fit(X)\n",
182-
" inertia[i, run_id] = km.inertia_\n",
183-
" p = plt.errorbar(n_init_range, inertia.mean(axis=1), inertia.std(axis=1))\n",
184-
" plots.append(p[0])\n",
185-
" legends.append(\"%s with %s init\" % (factory.__name__, init))\n",
186-
"\n",
187-
"plt.xlabel('n_init')\n",
188-
"plt.ylabel('inertia')\n",
189-
"plt.legend(plots, legends)\n",
190-
"plt.title(\"Mean inertia for various k-means init across %d runs\" % n_runs)\n",
191-
"\n",
192-
"# Part 2: Qualitative visual inspection of the convergence\n",
193-
"\n",
194-
"X, y = make_data(random_state, n_samples_per_center, grid_size, scale)\n",
195-
"km = MiniBatchKMeans(n_clusters=n_clusters, init='random', n_init=1,\n",
196-
" random_state=random_state).fit(X)\n",
197-
"\n",
198-
"plt.figure()\n",
199-
"for k in range(n_clusters):\n",
200-
" my_members = km.labels_ == k\n",
201-
" color = cm.nipy_spectral(float(k) / n_clusters, 1)\n",
202-
" plt.plot(X[my_members, 0], X[my_members, 1], 'o', marker='.', c=color)\n",
203-
" cluster_center = km.cluster_centers_[k]\n",
204-
" plt.plot(cluster_center[0], cluster_center[1], 'o',\n",
205-
" markerfacecolor=color, markeredgecolor='k', markersize=6)\n",
206-
" plt.title(\"Example cluster allocation with a single random init\\n\"\n",
207-
" \"with MiniBatchKMeans\")\n",
208-
"\n",
209-
"plt.show()"
86+
"# License: BSD 3 clause\n"
21087
]
21188
},
21289
{
21390
"cell_type": "code",
21491
"execution_count": null,
21592
"metadata": {},
21693
"outputs": [],
217-
"source": [
218-
""
219-
]
94+
"source": []
22095
}
22196
],
22297
"metadata": {
@@ -228,16 +103,16 @@
228103
"language_info": {
229104
"codemirror_mode": {
230105
"name": "ipython",
231-
"version": 3.0
106+
"version": 3
232107
},
233108
"file_extension": ".py",
234109
"mimetype": "text/x-python",
235110
"name": "python",
236111
"nbconvert_exporter": "python",
237112
"pygments_lexer": "ipython3",
238-
"version": "3.6.6"
113+
"version": "3.8.8"
239114
}
240115
},
241116
"nbformat": 4,
242-
"nbformat_minor": 0
117+
"nbformat_minor": 1
243118
}

‎03_data_science/03_scikit-learn/kmeans/plot_mini_batch_kmeans.ipynb‎

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,7 @@
33
{
44
"cell_type": "code",
55
"execution_count": 1,
6-
"metadata": {
7-
"collapsed": false
8-
},
6+
"metadata": {},
97
"outputs": [],
108
"source": [
119
"%matplotlib inline"
@@ -33,24 +31,24 @@
3331
{
3432
"cell_type": "markdown",
3533
"metadata": {},
36-
"source": [
37-
"比较K-Means和MiniBatchKMeans算法\n",
38-
"\n",
39-
"结论:初始化一致的情况下,差别很小。"
40-
]
34+
"source": []
4135
},
4236
{
4337
"cell_type": "code",
4438
"execution_count": 7,
45-
"metadata": {
46-
"collapsed": false
47-
},
39+
"metadata": {},
4840
"outputs": [
4941
{
5042
"name": "stdout",
5143
"output_type": "stream",
5244
"text": [
53-
"Automatically created module for IPython interactive environment\n0\n[False True False ... True True False] [1 0 1 ... 0 0 2] 0\n1\n[ True False True ... False False False] [1 0 1 ... 0 0 2] 1\n2\n[False False False ... False False True] [1 0 1 ... 0 0 2] 2\n"
45+
"Automatically created module for IPython interactive environment\n",
46+
"0\n",
47+
"[False True False ... True True False] [1 0 1 ... 0 0 2] 0\n",
48+
"1\n",
49+
"[ True False True ... False False False] [1 0 1 ... 0 0 2] 1\n",
50+
"2\n",
51+
"[False False False ... False False True] [1 0 1 ... 0 0 2] 2\n"
5452
]
5553
},
5654
{
@@ -175,9 +173,7 @@
175173
"execution_count": null,
176174
"metadata": {},
177175
"outputs": [],
178-
"source": [
179-
""
180-
]
176+
"source": []
181177
}
182178
],
183179
"metadata": {
@@ -189,16 +185,16 @@
189185
"language_info": {
190186
"codemirror_mode": {
191187
"name": "ipython",
192-
"version": 3.0
188+
"version": 3
193189
},
194190
"file_extension": ".py",
195191
"mimetype": "text/x-python",
196192
"name": "python",
197193
"nbconvert_exporter": "python",
198194
"pygments_lexer": "ipython3",
199-
"version": "3.6.6"
195+
"version": "3.8.8"
200196
}
201197
},
202198
"nbformat": 4,
203-
"nbformat_minor": 0
199+
"nbformat_minor": 1
204200
}

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /