1
+ {
2
+ "nbformat" : 4 ,
3
+ "nbformat_minor" : 0 ,
4
+ "metadata" : {
5
+ "colab" : {
6
+ "name" : " eclat.ipynb" ,
7
+ "provenance" : [],
8
+ "collapsed_sections" : [],
9
+ "toc_visible" : true
10
+ },
11
+ "kernelspec" : {
12
+ "name" : " python3" ,
13
+ "display_name" : " Python 3"
14
+ }
15
+ },
16
+ "cells" : [
17
+ {
18
+ "cell_type" : " markdown" ,
19
+ "metadata" : {
20
+ "id" : " 1fziHl7Ar94J"
21
+ },
22
+ "source" : [
23
+ " # Eclat"
24
+ ]
25
+ },
26
+ {
27
+ "cell_type" : " markdown" ,
28
+ "metadata" : {
29
+ "id" : " eiNwni1xsEgT"
30
+ },
31
+ "source" : [
32
+ " ## Importing the libraries"
33
+ ]
34
+ },
35
+ {
36
+ "cell_type" : " code" ,
37
+ "metadata" : {
38
+ "id" : " DUF77Qr1vqyM" ,
39
+ "outputId" : " 34b8aa55-2acb-4995-fd1a-681a5f38fbc4" ,
40
+ "colab" : {
41
+ "base_uri" : " https://localhost:8080/" ,
42
+ "height" : 188
43
+ }
44
+ },
45
+ "source" : [
46
+ " !pip install apyori"
47
+ ],
48
+ "execution_count" : 1 ,
49
+ "outputs" : [
50
+ {
51
+ "output_type" : " stream" ,
52
+ "text" : [
53
+ " Collecting apyori\n " ,
54
+ " Downloading https://files.pythonhosted.org/packages/5e/62/5ffde5c473ea4b033490617ec5caa80d59804875ad3c3c57c0976533a21a/apyori-1.1.2.tar.gz\n " ,
55
+ " Building wheels for collected packages: apyori\n " ,
56
+ " Building wheel for apyori (setup.py) ... \u001b [?25l\u001b [?25hdone\n " ,
57
+ " Created wheel for apyori: filename=apyori-1.1.2-cp36-none-any.whl size=5975 sha256=8f53091a20bda1c225ea3b1f2e0b370cda3bbd00dbc94685b752fa3c2f044d26\n " ,
58
+ " Stored in directory: /root/.cache/pip/wheels/5d/92/bb/474bbadbc8c0062b9eb168f69982a0443263f8ab1711a8cad0\n " ,
59
+ " Successfully built apyori\n " ,
60
+ " Installing collected packages: apyori\n " ,
61
+ " Successfully installed apyori-1.1.2\n "
62
+ ],
63
+ "name" : " stdout"
64
+ }
65
+ ]
66
+ },
67
+ {
68
+ "cell_type" : " code" ,
69
+ "metadata" : {
70
+ "id" : " UJfitBClsJlT"
71
+ },
72
+ "source" : [
73
+ " import numpy as np\n " ,
74
+ " import matplotlib.pyplot as plt\n " ,
75
+ " import pandas as pd"
76
+ ],
77
+ "execution_count" : 2 ,
78
+ "outputs" : []
79
+ },
80
+ {
81
+ "cell_type" : " markdown" ,
82
+ "metadata" : {
83
+ "id" : " vLt-7XUKsXBd"
84
+ },
85
+ "source" : [
86
+ " ## Data Preprocessing"
87
+ ]
88
+ },
89
+ {
90
+ "cell_type" : " code" ,
91
+ "metadata" : {
92
+ "id" : " J_A-UFOAsaDf"
93
+ },
94
+ "source" : [
95
+ " dataset = pd.read_csv('Market_Basket_Optimisation.csv', header = None)\n " ,
96
+ " transactions = []\n " ,
97
+ " for i in range(0, 7501):\n " ,
98
+ " transactions.append([str(dataset.values[i,j]) for j in range(0, 20)])"
99
+ ],
100
+ "execution_count" : 4 ,
101
+ "outputs" : []
102
+ },
103
+ {
104
+ "cell_type" : " markdown" ,
105
+ "metadata" : {
106
+ "id" : " 1wYZdBd5sea_"
107
+ },
108
+ "source" : [
109
+ " ## Training the Eclat model on the dataset"
110
+ ]
111
+ },
112
+ {
113
+ "cell_type" : " code" ,
114
+ "metadata" : {
115
+ "id" : " YzIk4vXZsj5i"
116
+ },
117
+ "source" : [
118
+ " from apyori import apriori\n " ,
119
+ " rules = apriori(transactions = transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)"
120
+ ],
121
+ "execution_count" : 5 ,
122
+ "outputs" : []
123
+ },
124
+ {
125
+ "cell_type" : " markdown" ,
126
+ "metadata" : {
127
+ "id" : " b176YNwWspiO"
128
+ },
129
+ "source" : [
130
+ " ## Visualising the results"
131
+ ]
132
+ },
133
+ {
134
+ "cell_type" : " markdown" ,
135
+ "metadata" : {
136
+ "id" : " iO6bF_dImT-E"
137
+ },
138
+ "source" : [
139
+ " ### Displaying the first results coming directly from the output of the apriori function"
140
+ ]
141
+ },
142
+ {
143
+ "cell_type" : " code" ,
144
+ "metadata" : {
145
+ "id" : " kvF-sLc6ifhd"
146
+ },
147
+ "source" : [
148
+ " results = list(rules)"
149
+ ],
150
+ "execution_count" : 6 ,
151
+ "outputs" : []
152
+ },
153
+ {
154
+ "cell_type" : " code" ,
155
+ "metadata" : {
156
+ "id" : " eAD8Co4_l9IE" ,
157
+ "outputId" : " 2f6d5b69-9be2-454b-ac7e-728a15dab23e" ,
158
+ "colab" : {
159
+ "base_uri" : " https://localhost:8080/" ,
160
+ "height" : 188
161
+ }
162
+ },
163
+ "source" : [
164
+ " results"
165
+ ],
166
+ "execution_count" : 7 ,
167
+ "outputs" : [
168
+ {
169
+ "output_type" : " execute_result" ,
170
+ "data" : {
171
+ "text/plain" : [
172
+ " [RelationRecord(items=frozenset({'light cream', 'chicken'}), support=0.004532728969470737, ordered_statistics=[OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'chicken'}), confidence=0.29059829059829057, lift=4.84395061728395)]),\n " ,
173
+ " RelationRecord(items=frozenset({'mushroom cream sauce', 'escalope'}), support=0.005732568990801226, ordered_statistics=[OrderedStatistic(items_base=frozenset({'mushroom cream sauce'}), items_add=frozenset({'escalope'}), confidence=0.3006993006993007, lift=3.790832696715049)]),\n " ,
174
+ " RelationRecord(items=frozenset({'escalope', 'pasta'}), support=0.005865884548726837, ordered_statistics=[OrderedStatistic(items_base=frozenset({'pasta'}), items_add=frozenset({'escalope'}), confidence=0.3728813559322034, lift=4.700811850163794)]),\n " ,
175
+ " RelationRecord(items=frozenset({'fromage blanc', 'honey'}), support=0.003332888948140248, ordered_statistics=[OrderedStatistic(items_base=frozenset({'fromage blanc'}), items_add=frozenset({'honey'}), confidence=0.2450980392156863, lift=5.164270764485569)]),\n " ,
176
+ " RelationRecord(items=frozenset({'herb & pepper', 'ground beef'}), support=0.015997866951073192, ordered_statistics=[OrderedStatistic(items_base=frozenset({'herb & pepper'}), items_add=frozenset({'ground beef'}), confidence=0.3234501347708895, lift=3.2919938411349285)]),\n " ,
177
+ " RelationRecord(items=frozenset({'ground beef', 'tomato sauce'}), support=0.005332622317024397, ordered_statistics=[OrderedStatistic(items_base=frozenset({'tomato sauce'}), items_add=frozenset({'ground beef'}), confidence=0.3773584905660377, lift=3.840659481324083)]),\n " ,
178
+ " RelationRecord(items=frozenset({'olive oil', 'light cream'}), support=0.003199573390214638, ordered_statistics=[OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'olive oil'}), confidence=0.20512820512820515, lift=3.1147098515519573)]),\n " ,
179
+ " RelationRecord(items=frozenset({'olive oil', 'whole wheat pasta'}), support=0.007998933475536596, ordered_statistics=[OrderedStatistic(items_base=frozenset({'whole wheat pasta'}), items_add=frozenset({'olive oil'}), confidence=0.2714932126696833, lift=4.122410097642296)]),\n " ,
180
+ " RelationRecord(items=frozenset({'shrimp', 'pasta'}), support=0.005065991201173177, ordered_statistics=[OrderedStatistic(items_base=frozenset({'pasta'}), items_add=frozenset({'shrimp'}), confidence=0.3220338983050847, lift=4.506672147735896)])]"
181
+ ]
182
+ },
183
+ "metadata" : {
184
+ "tags" : []
185
+ },
186
+ "execution_count" : 7
187
+ }
188
+ ]
189
+ },
190
+ {
191
+ "cell_type" : " markdown" ,
192
+ "metadata" : {
193
+ "id" : " MFkQP-fcjDBC"
194
+ },
195
+ "source" : [
196
+ " ### Putting the results well organised into a Pandas DataFrame"
197
+ ]
198
+ },
199
+ {
200
+ "cell_type" : " code" ,
201
+ "metadata" : {
202
+ "id" : " gyq7Poi0mMUe"
203
+ },
204
+ "source" : [
205
+ " def inspect(results):\n " ,
206
+ " lhs = [tuple(result[2][0][0])[0] for result in results]\n " ,
207
+ " rhs = [tuple(result[2][0][1])[0] for result in results]\n " ,
208
+ " supports = [result[1] for result in results]\n " ,
209
+ " return list(zip(lhs, rhs, supports))\n " ,
210
+ " resultsinDataFrame = pd.DataFrame(inspect(results), columns = ['Product 1', 'Product 2', 'Support'])"
211
+ ],
212
+ "execution_count" : 8 ,
213
+ "outputs" : []
214
+ },
215
+ {
216
+ "cell_type" : " markdown" ,
217
+ "metadata" : {
218
+ "id" : " IjrrlYW4jpTR"
219
+ },
220
+ "source" : [
221
+ " ### Displaying the results sorted by descending supports"
222
+ ]
223
+ },
224
+ {
225
+ "cell_type" : " code" ,
226
+ "metadata" : {
227
+ "id" : " nI7DJXng-nxQ" ,
228
+ "outputId" : " bbbe4321-f112-4e81-ad32-7c7197d96891" ,
229
+ "colab" : {
230
+ "base_uri" : " https://localhost:8080/" ,
231
+ "height" : 314
232
+ }
233
+ },
234
+ "source" : [
235
+ " resultsinDataFrame.nlargest(n = 10, columns = 'Support')"
236
+ ],
237
+ "execution_count" : 9 ,
238
+ "outputs" : [
239
+ {
240
+ "output_type" : " execute_result" ,
241
+ "data" : {
242
+ "text/html" : [
243
+ " <div>\n " ,
244
+ " <style scoped>\n " ,
245
+ " .dataframe tbody tr th:only-of-type {\n " ,
246
+ " vertical-align: middle;\n " ,
247
+ " }\n " ,
248
+ " \n " ,
249
+ " .dataframe tbody tr th {\n " ,
250
+ " vertical-align: top;\n " ,
251
+ " }\n " ,
252
+ " \n " ,
253
+ " .dataframe thead th {\n " ,
254
+ " text-align: right;\n " ,
255
+ " }\n " ,
256
+ " </style>\n " ,
257
+ " <table border=\" 1\" class=\" dataframe\" >\n " ,
258
+ " <thead>\n " ,
259
+ " <tr style=\" text-align: right;\" >\n " ,
260
+ " <th></th>\n " ,
261
+ " <th>Product 1</th>\n " ,
262
+ " <th>Product 2</th>\n " ,
263
+ " <th>Support</th>\n " ,
264
+ " </tr>\n " ,
265
+ " </thead>\n " ,
266
+ " <tbody>\n " ,
267
+ " <tr>\n " ,
268
+ " <th>4</th>\n " ,
269
+ " <td>herb & pepper</td>\n " ,
270
+ " <td>ground beef</td>\n " ,
271
+ " <td>0.015998</td>\n " ,
272
+ " </tr>\n " ,
273
+ " <tr>\n " ,
274
+ " <th>7</th>\n " ,
275
+ " <td>whole wheat pasta</td>\n " ,
276
+ " <td>olive oil</td>\n " ,
277
+ " <td>0.007999</td>\n " ,
278
+ " </tr>\n " ,
279
+ " <tr>\n " ,
280
+ " <th>2</th>\n " ,
281
+ " <td>pasta</td>\n " ,
282
+ " <td>escalope</td>\n " ,
283
+ " <td>0.005866</td>\n " ,
284
+ " </tr>\n " ,
285
+ " <tr>\n " ,
286
+ " <th>1</th>\n " ,
287
+ " <td>mushroom cream sauce</td>\n " ,
288
+ " <td>escalope</td>\n " ,
289
+ " <td>0.005733</td>\n " ,
290
+ " </tr>\n " ,
291
+ " <tr>\n " ,
292
+ " <th>5</th>\n " ,
293
+ " <td>tomato sauce</td>\n " ,
294
+ " <td>ground beef</td>\n " ,
295
+ " <td>0.005333</td>\n " ,
296
+ " </tr>\n " ,
297
+ " <tr>\n " ,
298
+ " <th>8</th>\n " ,
299
+ " <td>pasta</td>\n " ,
300
+ " <td>shrimp</td>\n " ,
301
+ " <td>0.005066</td>\n " ,
302
+ " </tr>\n " ,
303
+ " <tr>\n " ,
304
+ " <th>0</th>\n " ,
305
+ " <td>light cream</td>\n " ,
306
+ " <td>chicken</td>\n " ,
307
+ " <td>0.004533</td>\n " ,
308
+ " </tr>\n " ,
309
+ " <tr>\n " ,
310
+ " <th>3</th>\n " ,
311
+ " <td>fromage blanc</td>\n " ,
312
+ " <td>honey</td>\n " ,
313
+ " <td>0.003333</td>\n " ,
314
+ " </tr>\n " ,
315
+ " <tr>\n " ,
316
+ " <th>6</th>\n " ,
317
+ " <td>light cream</td>\n " ,
318
+ " <td>olive oil</td>\n " ,
319
+ " <td>0.003200</td>\n " ,
320
+ " </tr>\n " ,
321
+ " </tbody>\n " ,
322
+ " </table>\n " ,
323
+ " </div>"
324
+ ],
325
+ "text/plain" : [
326
+ " Product 1 Product 2 Support\n " ,
327
+ " 4 herb & pepper ground beef 0.015998\n " ,
328
+ " 7 whole wheat pasta olive oil 0.007999\n " ,
329
+ " 2 pasta escalope 0.005866\n " ,
330
+ " 1 mushroom cream sauce escalope 0.005733\n " ,
331
+ " 5 tomato sauce ground beef 0.005333\n " ,
332
+ " 8 pasta shrimp 0.005066\n " ,
333
+ " 0 light cream chicken 0.004533\n " ,
334
+ " 3 fromage blanc honey 0.003333\n " ,
335
+ " 6 light cream olive oil 0.003200"
336
+ ]
337
+ },
338
+ "metadata" : {
339
+ "tags" : []
340
+ },
341
+ "execution_count" : 9
342
+ }
343
+ ]
344
+ }
345
+ ]
346
+ }
0 commit comments