Apriori Algorithm Code in Python || Python
In this we are going to see a basic example of apriori algorithm in Python Programming Language.


  
#note: need a Apriori Dataset - Sheet1.csv file in dir

import numpy as np
import pandas as pd
from itertools import combinations

df = pd.read_csv('Apriori Dataset - Sheet1.csv')
dff = pd.DataFrame(df)
di = dff.Items.str.split(',')

suppCount = int(input("Enter the minimum support count : "))
Confidence = int(input("Enter the minimum confidence : "))

print("DATASET:")
print(df)
print()



# First set of Frequent Items L1
def validate(data, sc):
   data1 = pd.DataFrame()
   for i in range(len(data)):
       if data.iloc[i, 1] >= sc:
           temp = {'Itemsets': data.iloc[i, 0], 'supp_count': data.iloc[i, 1]}
           data1 = data1.append(temp, ignore_index=True)
   return data1


# To create combinations
def combinations1(list_of_items):
   itemsets = []
   i = 1
   for entry in list_of_items:
       proceding_items = list_of_items[i:]
       for item in proceding_items:
           if (type(item) is str):
               if entry != item:
                   tuples = (entry, item)
                   itemsets.append(tuples)
           else:
               if entry[0:-1] == item[0:-1]:
                   tuples = entry + item[1:]
                   itemsets.append(tuples)
       i = i + 1
   if (len(itemsets) == 0):
       return None
   return itemsets


# To calculate freq of Itemset
# Validate is for single item,this one for multiple
def count_itemsets(di, itemsets):
   count_itemset = {}
   for row in di:
       a = set(row)
       for itemset in itemsets:
           b = set(itemset)
           if (b.intersection(a)) == b:
               if itemset in count_itemset:
                   count_itemset[itemset] += 1
               else:
                   count_itemset[itemset] = 1
   data = pd.DataFrame()
   data['ItemSets'] = count_itemset.keys()
   data['supp_count'] = count_itemset.values()
   return data


# to print rules
def rules(Itemsets, Confidence):
   va = []  # if A->BC va is A
   vb = []  # if A->BC va is BC
   vc = []  # Confidence of above thingy
   # Above 3 only contain strong rules
   print("Rules : ")
   for l in Itemsets:
       l = frozenset(list(l))
       c = [frozenset(q) for q in combinations(l, len(l) - 1)]
       for a in c:
           # to split a set ABC into A->BC & BC->A
           b = l - a # b is The second part a is first part of arrow
           ab = l
           sab = 0  # support of ab
           sa = 0  # support of a
           sb = 0  # support of b
           for q in di:
               temp = set(q)
               if (a.issubset(temp)):
                   sa += 1
               if b.issubset(temp):
                   sb += 1
               if (ab.issubset(temp)):
                   sab += 1

           temp = sab / sa * 100
           # temp BC->A
           print(list(a), "-->>", list(b), "Confidence : ", temp)

           temp1 = sab / sb * 100  # Formula for Confidence
           # temp1 A->BC
           print(list(b), "-->>", list(a), "Confidence : ", temp1)

           if temp >= Confidence:
               va.append(list(a))
               vb.append(list(b))
               vc.append(temp)

           if (temp1 >= Confidence):
               va.append(list(b))
               vb.append(list(a))
               vc.append(temp1)

   # to print strong rules
   print()
   print("Valid Rules : ")
   for i in range(len(va)):
       print(va[i], "-->>", vb[i], "Confidence : ", vc[i])


count_item = {}
for row in di:
   for i in range(len(row)):
       if (row[i] in count_item):
           count_item[row[i]] += 1
       else:
           count_item[row[i]] = 1


data = pd.DataFrame()
data['ItemSets'] = count_item.keys()
data['supp_count'] = count_item.values()


freq = pd.DataFrame()

while (len(data) != 0):
   data = validate(data, suppCount)

   if (len(data) > 1) or (len(data) == 1 and int(data.supp_count >= suppCount)):
       freq = data
   if (len(data) != 0):
       Itemsets = combinations1(data.Itemsets)
       data = count_itemsets(di, Itemsets)

   print()
   print("ItemSet ")
   print(freq)



print()


rules(freq.Itemsets, Confidence)




#ENJOY CODING


Post a Comment

FOR ANY DOUBTS AND ERRORS FEEL FREE TO ASK. YOUR DOUBTS WILL BE ADDRESSED ASAP

Previous Post Next Post