Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 793a5eb

Browse files
Created using Colaboratory
1 parent aa25ee4 commit 793a5eb

File tree

1 file changed

+39
-25
lines changed

1 file changed

+39
-25
lines changed

‎09_Hierarchical_Clustering.ipynb

Lines changed: 39 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"name": "09 Hierarchical Clustering.ipynb",
77
"provenance": [],
88
"collapsed_sections": [],
9-
"authorship_tag": "ABX9TyPnLpjY7aB4VdaauAzHSYHL",
9+
"authorship_tag": "ABX9TyOcNJnzlGscARY2g2fgeEkQ",
1010
"include_colab_link": true
1111
},
1212
"kernelspec": {
@@ -36,7 +36,15 @@
3636
"# Hierarchical Clustering \n",
3737
"\n",
3838
"# Agglomerative Clustering which is a method of clustering \n",
39-
"# which builds a hierarchy of clusters by merging together small clusters"
39+
"# which builds a hierarchy of clusters by merging together small clusters\n",
40+
"\n",
41+
"# Silhouette Score\n",
42+
"# Clusters are well apart from each other as the silhouette score is closer to 1\n",
43+
"# Silhouette Coefficient score is a metric used to calculate the goodness of a clustering technique \n",
44+
"# Its value ranges from -1 to 1.\n",
45+
"# 1: Means clusters are well apart from each other and clearly distinguished.\n",
46+
"# 0: Means clusters are indifferent, or we can say that the distance between clusters is not significant.\n",
47+
"# -1: Means clusters are assigned in the wrong way."
4048
],
4149
"execution_count": 1,
4250
"outputs": []
@@ -56,7 +64,8 @@
5664
"from sklearn.cluster import AgglomerativeClustering\n",
5765
"from sklearn.datasets import load_iris\n",
5866
"import numpy as np\n",
59-
"import pandas as pd"
67+
"import pandas as pd\n",
68+
"from sklearn.metrics import silhouette_score"
6069
],
6170
"execution_count": 2,
6271
"outputs": []
@@ -70,7 +79,7 @@
7079
"base_uri": "https://localhost:8080/",
7180
"height": 204
7281
},
73-
"outputId": "b160b91f-2581-4d91-d255-86e0c327d994"
82+
"outputId": "0e1ad085-0af3-4ea2-b6c9-11c4a8bafb6c"
7483
},
7584
"source": [
7685
"# Loading the Dataset\n",
@@ -184,7 +193,7 @@
184193
"base_uri": "https://localhost:8080/",
185194
"height": 107
186195
},
187-
"outputId": "edba7d29-6f17-45ef-8198-686fb3cf55a9"
196+
"outputId": "cc3304da-9a95-4ff0-9361-29866fb1f068"
188197
},
189198
"source": [
190199
"# Preparing Data\n",
@@ -221,7 +230,7 @@
221230
"base_uri": "https://localhost:8080/",
222231
"height": 125
223232
},
224-
"outputId": "dbd5dec4-fecf-4888-9854-ceb6d4102fc1"
233+
"outputId": "7d690fac-4ee5-4b4b-d187-18c53fc805b8"
225234
},
226235
"source": [
227236
"# Now we will separate the target variable from the original dataset \n",
@@ -270,7 +279,7 @@
270279
"base_uri": "https://localhost:8080/",
271280
"height": 395
272281
},
273-
"outputId": "face4433-b7bb-438f-c25e-3b76bfdf7e38"
282+
"outputId": "6d6a1938-d925-4fc0-a686-0a8d1a3a8720"
274283
},
275284
"source": [
276285
"# Filtering Setosa\n",
@@ -324,7 +333,7 @@
324333
"base_uri": "https://localhost:8080/",
325334
"height": 485
326335
},
327-
"outputId": "57852746-b1cf-44af-fdc7-3e4a9719fc93"
336+
"outputId": "083498f6-b5bc-4968-8cbb-a7c20cbeab12"
328337
},
329338
"source": [
330339
"# Filtering Setosa for 2D Plot \n",
@@ -397,7 +406,7 @@
397406
"base_uri": "https://localhost:8080/",
398407
"height": 395
399408
},
400-
"outputId": "075f81a4-595c-4c0c-e8b7-31721acb2488"
409+
"outputId": "e58b70b5-3623-44dd-fe31-417d81bb5275"
401410
},
402411
"source": [
403412
"# Filtering Versicolour\n",
@@ -451,7 +460,7 @@
451460
"base_uri": "https://localhost:8080/",
452461
"height": 485
453462
},
454-
"outputId": "8d22f3ae-c703-4303-cc75-c55cf5b264a6"
463+
"outputId": "483a8bf5-ae7f-4951-caa2-063cbbbc9cee"
455464
},
456465
"source": [
457466
"# Filtering Versicolour for 2D Plot \n",
@@ -523,7 +532,7 @@
523532
"base_uri": "https://localhost:8080/",
524533
"height": 395
525534
},
526-
"outputId": "cfed997b-a096-4ebb-c2b3-05edfafa362c"
535+
"outputId": "79cbabbb-0739-45b8-fd36-9a2d71bda44b"
527536
},
528537
"source": [
529538
"# Filtering Virginica\n",
@@ -577,7 +586,7 @@
577586
"base_uri": "https://localhost:8080/",
578587
"height": 485
579588
},
580-
"outputId": "ad81832b-49ce-4768-b791-71f0f34ae15f"
589+
"outputId": "66e8d27b-86aa-4c81-9a11-27e28117b09d"
581590
},
582591
"source": [
583592
"# Filtering Virginica for 2D Plot\n",
@@ -648,7 +657,7 @@
648657
"base_uri": "https://localhost:8080/",
649658
"height": 286
650659
},
651-
"outputId": "c1f60208-731a-425e-9286-d06b52ba9777"
660+
"outputId": "d2bd50fd-12dd-48b8-aacd-c51aba490576"
652661
},
653662
"source": [
654663
"# Visualise Classes all at once\n",
@@ -675,7 +684,7 @@
675684
"output_type": "execute_result",
676685
"data": {
677686
"text/plain": [
678-
"<matplotlib.legend.Legend at 0x7f27d345e470>"
687+
"<matplotlib.legend.Legend at 0x7ffa7d6145f8>"
679688
]
680689
},
681690
"metadata": {
@@ -707,7 +716,7 @@
707716
"base_uri": "https://localhost:8080/",
708717
"height": 204
709718
},
710-
"outputId": "be9c9cac-ac1f-4696-89d0-eed6c4c2b764"
719+
"outputId": "d2a9b743-a4df-4e58-9299-6ffb7acfab9e"
711720
},
712721
"source": [
713722
"# Plotting of Dendrogram\n",
@@ -817,7 +826,7 @@
817826
"base_uri": "https://localhost:8080/",
818827
"height": 395
819828
},
820-
"outputId": "db224ba6-bde1-4c05-ea6b-b6f32c907a80"
829+
"outputId": "06710af3-fec8-4c5c-9c12-80a2c04c4a99"
821830
},
822831
"source": [
823832
"# We finally plot a Dendrogram \n",
@@ -869,7 +878,7 @@
869878
"base_uri": "https://localhost:8080/",
870879
"height": 392
871880
},
872-
"outputId": "63f4452f-5967-453c-9fad-432b744367a1"
881+
"outputId": "bfc6af2f-4431-43b1-dfb7-ca24c0715e2b"
873882
},
874883
"source": [
875884
"# Single Linkage - Nearest Point\n",
@@ -919,7 +928,7 @@
919928
"base_uri": "https://localhost:8080/",
920929
"height": 395
921930
},
922-
"outputId": "de9efac7-202b-4b5c-8f36-55b476c8b14c"
931+
"outputId": "8445cacc-3dc7-43ec-9f59-7d7b5960bc80"
923932
},
924933
"source": [
925934
"# Complete Linkage - Farthest Point \n",
@@ -969,7 +978,7 @@
969978
"base_uri": "https://localhost:8080/",
970979
"height": 392
971980
},
972-
"outputId": "7e098ef1-7d30-4d24-f784-126bf6f3f82e"
981+
"outputId": "567b5a53-d4d9-43a6-b364-7c3c0331d5f2"
973982
},
974983
"source": [
975984
"# Average Linkage - Average Distance between all points\n",
@@ -1019,7 +1028,7 @@
10191028
"base_uri": "https://localhost:8080/",
10201029
"height": 71
10211030
},
1022-
"outputId": "6b697d9a-f7ba-43e7-b25f-31144f3be908"
1031+
"outputId": "4440bfae-dd1d-435e-d414-f7a8952b7c82"
10231032
},
10241033
"source": [
10251034
"# Building an Agglomerative Clustering Model\n",
@@ -1057,7 +1066,7 @@
10571066
"base_uri": "https://localhost:8080/",
10581067
"height": 71
10591068
},
1060-
"outputId": "a2336119-c81d-41c2-ed90-0728b8e97d2f"
1069+
"outputId": "d3c38bfd-fc5d-4c4f-83a8-855e3daf9d9f"
10611070
},
10621071
"source": [
10631072
"# Fitting Model\n",
@@ -1095,7 +1104,7 @@
10951104
"base_uri": "https://localhost:8080/",
10961105
"height": 143
10971106
},
1098-
"outputId": "12a9c91b-5ecd-4d1c-bddd-710622ede31e"
1107+
"outputId": "425d233f-d1c6-4c05-9e5d-f2bf90e5b941"
10991108
},
11001109
"source": [
11011110
"# Predicting Output Class\n",
@@ -1131,9 +1140,9 @@
11311140
"colab_type": "code",
11321141
"colab": {
11331142
"base_uri": "https://localhost:8080/",
1134-
"height": 394
1143+
"height": 430
11351144
},
1136-
"outputId": "3b95e094-51d6-455f-ba63-1ea4d194f9fc"
1145+
"outputId": "c142809b-4d35-4c4d-aa6c-8575e940389f"
11371146
},
11381147
"source": [
11391148
"# Visualizing Output\n",
@@ -1144,6 +1153,9 @@
11441153
"unique, counts = np.unique(pred1, return_counts=True)\n",
11451154
"print('Hierarchical Clustering Output Cluster')\n",
11461155
"print(dict(zip(unique, counts)))\n",
1156+
"# Silhouette Score\n",
1157+
"print('Silhouette Score for 3 Clusters')\n",
1158+
"print(silhouette_score(iris_X,pred1))\n",
11471159
"print('\\n')\n",
11481160
"\n",
11491161
"# In the above output we got value labels: ‘0’, ‘1’ and ‘2’\n",
@@ -1176,6 +1188,8 @@
11761188
"{0: 50, 1: 50, 2: 50}\n",
11771189
"Hierarchical Clustering Output Cluster\n",
11781190
"{0: 64, 1: 50, 2: 36}\n",
1191+
"Silhouette Score for 3 Clusters\n",
1192+
"0.5543236611296415\n",
11791193
"\n",
11801194
"\n"
11811195
],
@@ -1185,7 +1199,7 @@
11851199
"output_type": "execute_result",
11861200
"data": {
11871201
"text/plain": [
1188-
"<matplotlib.legend.Legend at 0x7f27cffcdb70>"
1202+
"<matplotlib.legend.Legend at 0x7ffa7a180d30>"
11891203
]
11901204
},
11911205
"metadata": {

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /