# To find frequent flyer program based on their total miles of travel

#### Description

To find frequent flyer in New Zealand airlines to provide special service to their customers and improve their revenues also.

#### Process

Read the data set.

Check the missing values.

Take descriptive statistics.

Select the feature variables.

Use PCA for dimension reduction.

Scaling the variable.

Initialize k-means object.

Fit X variable in to the clustering object.

Plot the result.

#### Sample Code

#importing libraries

import pandas as pd

import pylab as pl

from sklearn.cluster import KMeans

from sklearn.decomposition import PCA

#Read the data set

data=pd.read_excel(‘/home/soft23/soft23/

Sathish/Pythonfiles/flyer.xlsx’)

#checking missing values

print(“Checking missing values\n”)

print(data.isnull().sum())

#calculate descriptive statistics

print(“Descriptive statistics\n”)

print(data.describe())

#selecting feature variable

Y = data[[‘FFP#’]]

X = data[[‘FlyingReturnsMiles’,’FlightTrans’]]

#To know possible clusters for target variable

Nc = range(1,5)

kmeans = [KMeans(n_clusters=i) for i in Nc]

score = [kmeans[i].fit(Y).score(Y) for i in range(len(kmeans))]

#Plot elbow curve

pl.plot(Nc,score,marker=’o’)

pl.xlabel(‘Number of Clusters’)

pl.ylabel(‘Score’)

pl.title(‘Elbow Curve’)

pl.show()

#Scaling the data

pca = PCA(n_components=1).fit(X)

pca_d = pca.transform(Y)

pca_c = pca.transform(X)

print(“After transform\n”,pca_d)

print(“After transform\n”,pca_c)

#Build Kmeans clustering with 4 cluster

kmeans=KMeans(n_clusters=4)

kmeansoutput=kmeans.fit(X)

print(kmeansoutput)

pl.figure(‘4 Cluster K-Means’)

#Scatter plot

pl.scatter(pca_d[:, 0], pca_c[:, 0], c=kmeansoutput.labels_,s=200)

pl.xlabel(‘FlyingReturnsMiles & FlightTrans’)

pl.ylabel(‘FFP#’)

pl.title(‘4 Cluster K-Means’)

pl.show()