#5, First Floor, 4th Street , Dr. Subbarayan Nagar, Kodambakkam, Chennai-600 024 pro@slogix.in

Office Address

  • #5, First Floor, 4th Street Dr. Subbarayan Nagar Kodambakkam, Chennai-600 024 Landmark : Samiyar Madam
  • pro@slogix.in
  • +91- 81240 01111

Social List

Simple credit card fraud detection using logistic regression in python?

To detect fraud transaction using logistic regression algorithm using python.


  Read the data set.

  Do data pre-processing.

   Split the data into train and test.

   Build the logistic model.

   Train the classifier.

   Test the classifier with test data.

   Measure precision,recall and f1-score and accuracy of the model.

Sapmle Code

#import libraries
import warnings
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.metrics import classification_report

#read the data set
data = pd.read_csv(‘/home/soft23/soft23/Sathish/practice/creditcard.csv’)

#taking sample from population if need
#data = data.sample(30000)

#no of rows and columns
print(“Total rows and columns\n\n”,data.shape,”\n”)

#Dependent and independent variable
X = data.iloc[:, 1:30].columns

y = data[‘Class’]

X = data[X]

#total count in each class
count = data[‘Class’].value_counts()
print(“Total count in each class\n\n”,count)

#splitting the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Build the model
clf = LogisticRegression()

# Train the classifier
clf.fit(X_train, y_train)

#test the model
y_pred = clf.predict(X_test)

#classification report
cr = (classification_report(y_test, y_pred))

#confusion matrix
cm = (metrics.confusion_matrix(y_test, y_pred))
print(“Confusion Matrix:\n\n”,cm,”\n”)

#classification report
print(classification_report(y_test, y_pred))

#Accuracy score
a= (metrics.accuracy_score(y_test, y_pred))
print(“Accuracy score:”,round(a,1))

#heat map for confusion matrix
fig, ax = plt.subplots(figsize=(7,4))
sns.heatmap(cm, annot=True, fmt=’d’)

#print the actual and predicted labels
df1 = pd.DataFrame({‘Actual’:y_test, ‘Predicted’: y_pred})

#ROC curve evaluation
print(“Roc Curve evaluation”)
fpr, tpr, _ = metrics.roc_curve(y_test, y_pred)
auc = metrics.roc_auc_score(y_test, y_pred)
plt.plot(fpr,tpr,label=”data, auc=”+str(auc))