To implement frequency pattern mining using Spark with python
Set up Spark Context and Spark session
Load the Data set
Convert the pandas data frame to spark data frame
Generate the frequency pattern mining model and fit it
Find the frequent item sets
Generate the association rule
Examine the input items against all the association rules and summarize the consequents as prediction
Copy the output to a excel file
#import necessary libraries
import pandas as pd
from pyspark.sql import SparkSession
from pyspark.ml.fpm import FPGrowth
#Set up SparkContext and SparkSession
spark=SparkSession \
.builder \
.appName(“Python spark regression example”)\
.config(“spark.some.config.option”,”some-value”)\
.getOrCreate()
#Read the file
data=pd.read_excel(‘/home/……/GsData.xlsx’)
Final_df=pd.DataFrame()
for i in range(0,len(data)):
coln=[list(data.iloc[i].dropna())]
temp={‘index’:i,’val’:coln}
temp_df=pd.DataFrame(temp)
if i==0:
Final_df=temp_df
else:
Final_df=Final_df.append(temp_df,ignore_index=True)
len(Final_df)
len(Final_df[‘val’].iloc[0])
df = spark.createDataFrame([
(i, Final_df[‘val’].iloc[i]) for i in range(0,len(Final_df))], [“id”, “items”])
#Fit the Frequent pattern mining model
fpGrowth = FPGrowth(itemsCol=”items”, minSupport=0.2, minConfidence=0.3)
model = fpGrowth.fit(df)
# Display frequent itemsets.
model.freqItemsets.show()
# Display generated association rules.
Ar_op=model.associationRules
Ar_op.show()
Ar_df=Ar_op.toPandas()
# transform examines the input items against all the association rules and summarize the
# consequents as prediction
Tr_op=model.transform(df)
Tr_op.show()
Tr_df=Tr_op.toPandas()
with pd.ExcelWriter(‘/home/soft23/soft23/Akshaya/python/Freq_mining.xlsx’,) as writer:
Ar_df.to_excel(writer,index=False, sheet_name=’Association_Rule’,merge_cells=False)
Tr_df.to_excel(writer, index=False,sheet_name=’Basket_prediction’)