Amazing technological breakthrough possible @S-Logix

Office Address

  • 2nd Floor, #7a, High School Road, Secretariat Colony Ambattur, Chennai-600053 (Landmark: SRM School) Tamil Nadu, India
  • +91- 81240 01111

Social List

How to build a ensemble of machine learning classifiers in python?


To build a ensemble model for improved results using Voting classifier technique in python


  Load the data set.

  Set the dependent and independent features.

  Split the data into training and testing set.

  Build the base estimators.

  Import Voting Classifier from sklearn library.

  Fit the data into the model.

  Predict the test data.

  Calculate accuracy, precision and recall

Sample Code

#import libraries
import warnings
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import model_selection
from sklearn.model_selection import cross_val_score
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

#load data
data = pd.read_csv(‘/home/soft50/soft50/Sathish/practice/iris.csv’)

#check missing values
print(“Checking missing values\n\n”,data.isnull().sum())

#make it as a data frame
df = pd.DataFrame(data)

#print data shape
print(“Shape of data\n\n”,df.shape)

#Feature extraction
#Define X and y variable
X = df.iloc[:,0:4]
y = df.iloc[:,4]

#Training and testing data using 80:20 rule
#Split train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

#training set and testing set
print(“Training data set\n\n”,X_train,”\n”,y_train)
print(“Testing data set\n\n”,X_test)

#Building the model

naive_bayes = MultinomialNB(),y_train)
y_pred1 = naive_bayes.predict(X_test)

#Evaluate the model
print(“Classification report for Naive_bayes\n”)
print(classification_report(y_test, y_pred1))
print(“Confusion matrix\n”)
print(confusion_matrix(y_test, y_pred1))
#print(“Accuracy score”)
#print(accuracy_score(y_test, y_pred1))

svm = SVC(kernel=’linear’,probability=True),y_train)
y_pred2 = svm.predict(X_test)

#Evaluate the model
print(“Classification report for SVM\n”)
print(classification_report(y_test, y_pred2))
print(“Confusion matrix\n”)
print(confusion_matrix(y_test, y_pred2))
#print(“Accuracy score”)
#print(accuracy_score(y_test, y_pred2))

#Random forest
rf = RandomForestClassifier()
#create a dictionary of all values we want to test for n_estimators
params_rf = {‘n_estimators’: [50, 100, 200]}
#use gridsearch to test all values for n_estimators
rf_gs = GridSearchCV(rf, params_rf, cv=5)
#fit model to training data, y_train)
rf_best = rf_gs.best_estimator_
y_pred5 = rf_best.predict(X_test)

#Evaluate the model
print(“Classification report for Random Forest\n”)
print(classification_report(y_test, y_pred5))
print(“Confusion matrix\n”)
print(confusion_matrix(y_test, y_pred5))
#print(“Accuracy score”)
#print(accuracy_score(y_test, y_pred5))

#Score for individual model
print(“Accuracy score for individual model\n”)
print(‘Naive Bayes: {}’.format(naive_bayes.score(X_test, y_test)))
print(‘SVM: {}’.format(svm.score(X_test, y_test)))
print(‘Random forest: {}’.format(rf_best.score(X_test, y_test)))

#create a dictionary of our models
estimators=[(‘Bayes’,naive_bayes),(‘svm’,svm),(‘rf’, rf_best)]

#create our voting classifier, inputting our models
ensemble = VotingClassifier(estimators, voting=’soft’)

#fit model to training data, y_train)
y_pred6 = ensemble.predict(X_test)

#Evaluate the model
print(“Classification report for Ensemble model\n”)
print(classification_report(y_test, y_pred6))
print(“Confusion matrix\n”)
print(confusion_matrix(y_test, y_pred6))

#test our model on the test data
print(“Accuracy score of ensemble model “)
print(‘Ensemble: {}’.format(ensemble.score(X_test, y_test)))

build a ensemble of machine learning classifiers in python
Load dataset
import GridSearchCV
import classification_report, confusion_matrix, accuracy_score
Checking missing values
make it as a data frame
Training and testing data using 80:20 rule
training set and testing set
Building the mode