To find the optimal number of clusters using NbClust package in R.
three different methods(silhoutte,
elbow,gap statistic) for any partitioning
clustering methods(k-means,k-mediods,
HCUT)
computes all the indices and determine the
number of clusters in a single function call.
“ward.D”, “ward.D2”, “single”, “complete”,
“average” – for hierarchical clustering
#Optimal Number of Clusters
#Loading required packages
#install.packages(“factoextra”)
library(“factoextra”)
#install.packages(“NbClust”)
library(“NbClust”)
#Input dataset
input View(input)
#Elbow method
fviz_nbclust(input[,3:4],kmeans,method = “wss”) +
geom_vline(xintercept = 3,linetype=2) +
labs(subtitle=”Elbow Method”)
#Silhouette Method
fviz_nbclust(input[,3:4],kmeans,method = “silhouette”) +
labs(subtitle=”Silhouette Method”)
#Gap Statistic method
fviz_nbclust(input[,3:4],kmeans,method = “gap_stat”)
labs(subtitle=”Gap Statistic Method”)
#Nbclust() function to find optimal no of clusters using 30 methods at a time
NbClust(input[,3:4], min.nc = 2, max.nc = 10,method = “kmeans”, distance = “euclidean”)