To implement multilayer perceptron for classification in Spark with R using sparklyR
#Load the sparklyr library
library(sparklyr)
#Create a spark connection
sc #Copy data to spark environment
data_s=sdf_copy_to(sc,read.csv(“/home/soft23/soft23/cardata.csv”),”car”,overwrite= TRUE)
#Convert the categorical input and character input to integer
data_s=ft_string_indexer(data_s,input_col = “buying”,output_col=”buying_ind”)
data_s=ft_string_indexer(data_s,input_col = “maint”,output_col=”maint_ind”)
data_s=ft_string_indexer(data_s,input_col = “lug_boot”,output_col=”lug_boot_ind”)
data_s=ft_string_indexer(data_s,input_col = “safety”,output_col=”safety_ind”)
data_s=ft_string_indexer(data_s,input_col = “doors”,output_col=”doors_ind”)
data_s=ft_string_indexer(data_s,input_col = “persons”,output_col=”persons_ind”)
#Split the data for training and testing
partitions=sdf_partition(data_s,training=0.7,test=0.3,seed=111)
train_data=partitions$training
test_data=partitions$test
#Build the mlp model
mlp_model summary(mlp_model)
#Predict using the test data
predictions = ml_predict(mlp_model, test_data)
predictions
#Evaluate the metrics AUC
cat(“\nF1 : “,ml_multiclass_classification_evaluator(predictions, label_col = “label”,prediction_col = “prediction”))
cat(“\nAccuracy : “,ml_multiclass_classification_evaluator(predictions, label_col = “label”,prediction_col = “prediction”,metric_name=”accuracy”))
cat(“\nPrecision : “,ml_multiclass_classification_evaluator(predictions, label_col = “label”,prediction_col = “prediction”,metric_name=”weightedPrecision”))
cat(“\nRecall : “,ml_multiclass_classification_evaluator(predictions, label_col = “label”,prediction_col = “prediction”,metric_name=”weightedRecall”))