5
5
- Frequent itemsets
6
6
- Association rules with minimum confidence and lift
7
7
8
- WIKI:https://en.wikipedia.org/wiki/Apriori_algorithm
8
+ WIKI:https://en.wikipedia.org/wiki/Apriori_algorithm
9
9
"""
10
10
11
11
from collections import defaultdict
12
12
from itertools import combinations
13
+ from typing import List , Dict , Tuple , Set
13
14
14
- def load_data () -> list [list [str ]]:
15
+
16
+ def load_data () -> List [List [str ]]:
15
17
"""
16
18
Returns a sample transaction dataset.
17
19
18
- >>> load_data()
19
- [['milk'], ['milk', 'butter'], ['milk', 'bread'], ['milk', 'bread', 'chips']]
20
+ >>> data = load_data()
21
+ >>> len(data)
22
+ 4
23
+ >>> 'milk' in data[0]
24
+ True
20
25
"""
21
26
return [["milk" ], ["milk" , "butter" ], ["milk" , "bread" ], ["milk" , "bread" , "chips" ]]
22
27
23
28
24
29
class Apriori :
25
30
"""Apriori algorithm class with support, confidence, and lift filtering."""
26
31
27
- def __init__ (self , transactions , min_support = 0.25 , min_confidence = 0.5 , min_lift = 1.0 ):
28
- self .transactions = [set (t ) for t in transactions ]
29
- self .min_support = min_support
30
- self .min_confidence = min_confidence
31
- self .min_lift = min_lift
32
- self .itemsets = []
33
- self .rules = []
32
+ def __init__ (
33
+ self ,
34
+ transactions : List [List [str ]],
35
+ min_support : float = 0.25 ,
36
+ min_confidence : float = 0.5 ,
37
+ min_lift : float = 1.0 ,
38
+ ) -> None :
39
+ self .transactions : List [Set [str ]] = [set (t ) for t in transactions ]
40
+ self .min_support : float = min_support
41
+ self .min_confidence : float = min_confidence
42
+ self .min_lift : float = min_lift
43
+ self .itemsets : List [Dict [frozenset , float ]] = []
44
+ self .rules : List [Tuple [frozenset , frozenset , float , float ]] = []
34
45
35
46
self .find_frequent_itemsets ()
36
47
self .generate_association_rules ()
37
48
38
49
def _get_support (self , itemset : frozenset ) -> float :
39
50
"""Return support of an itemset."""
40
- return sum (1 for t in self .transactions if itemset .issubset (t )) / len (self .transactions )
51
+ return sum (1 for t in self .transactions if itemset .issubset (t )) / len (
52
+ self .transactions
53
+ )
41
54
42
55
def confidence (self , antecedent : frozenset , consequent : frozenset ) -> float :
43
56
"""Calculate confidence of a rule A -> B."""
44
- support_antecedent = self ._get_support (antecedent )
45
- support_both = self ._get_support (antecedent | consequent )
46
- return support_both / support_antecedent if support_antecedent > 0 else 0
57
+ support_antecedent : float = self ._get_support (antecedent )
58
+ support_both : float = self ._get_support (antecedent | consequent )
59
+ return support_both / support_antecedent if support_antecedent > 0 else 0.0
47
60
48
61
def lift (self , antecedent : frozenset , consequent : frozenset ) -> float :
49
62
"""Calculate lift of a rule A -> B."""
50
- support_consequent = self ._get_support (consequent )
51
- conf = self .confidence (antecedent , consequent )
52
- return conf / support_consequent if support_consequent > 0 else 0
63
+ support_consequent : float = self ._get_support (consequent )
64
+ conf : float = self .confidence (antecedent , consequent )
65
+ return conf / support_consequent if support_consequent > 0 else 0.0
53
66
54
- def find_frequent_itemsets (self ):
67
+ def find_frequent_itemsets (self )-> List [ Dict [ frozenset , float ]] :
55
68
"""Generate all frequent itemsets."""
56
- item_counts = defaultdict (int )
69
+ item_counts : Dict [ frozenset , int ] = defaultdict (int )
57
70
for t in self .transactions :
58
71
for item in t :
59
72
item_counts [frozenset ([item ])] += 1
60
73
61
- total = len (self .transactions )
62
- current_itemsets = {k : v / total for k , v in item_counts .items () if v / total >= self .min_support }
63
- self .itemsets .append (current_itemsets )
74
+ total : int = len (self .transactions )
75
+ current_itemsets : Dict [frozenset , float ] = {
76
+ k : v / total for k , v in item_counts .items () if v / total >= self .min_support
77
+ }
78
+ if current_itemsets :
79
+ self .itemsets .append (current_itemsets )
64
80
65
- k = 2
81
+ k : int = 2
66
82
while current_itemsets :
67
- candidates = set ()
68
- keys = list (current_itemsets .keys ())
83
+ candidates : Set [ frozenset ] = set ()
84
+ keys : List [ frozenset ] = list (current_itemsets .keys ())
69
85
for i in range (len (keys )):
70
86
for j in range (i + 1 , len (keys )):
71
87
union = keys [i ] | keys [j ]
72
- if len (union ) == k and all (frozenset (sub ) in current_itemsets for sub in combinations (union , k - 1 )):
73
- candidates .add (union )
74
-
75
- freq_candidates = {c : self ._get_support (c ) for c in candidates if self ._get_support (c ) >= self .min_support }
88
+ if len (union ) == k and all (
89
+ frozenset (sub ) in current_itemsets
90
+ for sub in combinations (union , k - 1 )
91
+ ):
92
+ candidates .add (union )
93
+
94
+ freq_candidates : Dict [frozenset , float ] = {
95
+ c : self ._get_support (c ) for c in candidates if self ._get_support (c ) >= self .min_support
96
+ }
76
97
if not freq_candidates :
77
98
break
78
99
@@ -82,20 +103,26 @@ def find_frequent_itemsets(self):
82
103
83
104
return self .itemsets
84
105
85
- def generate_association_rules (self ):
106
+ def generate_association_rules (self )-> List [ Tuple [ frozenset , frozenset , float , float ]] :
86
107
"""Generate association rules with min confidence and lift."""
87
108
for level in self .itemsets :
88
109
for itemset in level :
89
110
if len (itemset ) < 2 :
90
111
continue
91
112
for i in range (1 , len (itemset )):
92
113
for antecedent in combinations (itemset , i ):
93
- antecedent = frozenset (antecedent )
94
- consequent = itemset - antecedent
95
- conf = self .confidence (antecedent , consequent )
96
- lft = self .lift (antecedent , consequent )
97
- if conf >= self .min_confidence and lft >= self .min_lift :
98
- self .rules .append ((antecedent , consequent , conf , lft ))
114
+ antecedent_set : frozenset = frozenset (antecedent )
115
+ consequent_set : frozenset = itemset - antecedent_set
116
+ conf : float = self .confidence (antecedent_set , consequent_set )
117
+ lft : float = self .lift (antecedent_set , consequent_set )
118
+ rule : Tuple [frozenset , frozenset , float , float ] = (
119
+ antecedent_set ,
120
+ consequent_set ,
121
+ conf ,
122
+ lft ,
123
+ )
124
+ if rule not in self .rules and conf >= self .min_confidence and lft >= self .min_lift :
125
+ self .rules .append (rule )
99
126
return self .rules
100
127
101
128
@@ -104,8 +131,10 @@ def generate_association_rules(self):
104
131
105
132
doctest .testmod ()
106
133
107
- transactions = load_data ()
108
- model = Apriori (transactions , min_support = 0.25 , min_confidence = 0.1 , min_lift = 0.0 )
134
+ transactions : List [List [str ]] = load_data ()
135
+ model : Apriori = Apriori (
136
+ transactions , min_support = 0.25 , min_confidence = 0.1 , min_lift = 0.0
137
+ )
109
138
110
139
print ("Frequent itemsets:" )
111
140
for level in model .itemsets :
@@ -114,7 +143,8 @@ def generate_association_rules(self):
114
143
115
144
print ("\n Association Rules:" )
116
145
for rule in model .rules :
117
- antecedent , consequent , conf , lift = rule
118
- print (f"{ set (antecedent )} -> { set (consequent )} , conf={ conf :.2f} , lift={ lift :.2f} " )
119
-
120
-
146
+ antecedent , consequent , conf , lift_value = rule
147
+ print (
148
+ f"{ set (antecedent )} -> { set (consequent )} , "
149
+ f"conf={ conf :.2f} , lift={ lift_value :.2f} "
150
+ )
0 commit comments