Market Basket Analysis in Python
Isaiah Hull
Visiting Associate Professor of Finance, BI Norwegian Business School
from mlxtend.frequent_patterns import association_rules, apriori
# Load the one-hot encoded data
onehot = pd.read_csv('datasets/movies_onehot.csv')
# Generate frequent itemsets
frequent_itemsets = apriori(onehot, min_support = 0.10, use_colnames = True, max_len = 2)
# Generate association rules
rules = association_rules(frequent_itemsets, metric = 'support', min_threshold = 0.00)
# Convert rules to coordinates.
rules['antecedent'] = rules['antecedents'].apply(lambda antecedent: list(antecedent)[0])
rules['consequent'] = rules['consequents'].apply(lambda consequent: list(consequent)[0])
rules['rule'] = rules.index
# Define coordinates and label
coords = rules[['antecedent','consequent','rule']]
# Print example
print(coords.head(1))
antecedent consequent rule
0 Dark Knight, The (2008) Inception (2010) 0
from pandas.plotting import parallel_coordinates
# Generate parallel coordinates plot
parallel_coordinates(coords, 'rule', colormap = 'ocean')
# Generate frequent itemsets
frequent_itemsets = apriori(onehot, min_support = 0.01, use_colnames = True, max_len = 2)
# Generate association rules
rules = association_rules(frequent_itemsets, metric = 'lift', min_threshold = 1.00)
# Generate coordinates and print example
coords = rules_to_coordinates(rules)
# Generate parallel coordinates plot
parallel_coordinates(coords, 'rule')
Market Basket Analysis in Python