How to do bi-variate analysis in python?


To implement bi-variate analysis using python.

Bi-Variate Analysis:

  • Bi-variate Analysis finds out the relationship between two variables.
  • Using bi-variate analysis association and disassociation between variables at a pre-defined significance level.
  • We can perform bi-variate analysis for any combination of categorical and continuous variables.

mport pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#load data set URL
url = “”
names = [‘sepal-length’, ‘sepal-width’, ‘petal-length’, ‘petal-width’, ‘class’]
data = pd.read_csv(url, names=names)

df = pd.DataFrame(data)

X = df[‘sepal-length’]

print(“X variable\n\n”,X.head())

y = df[‘class’]

print(“y variable\n\n”,y.head())

#Two way table
two_way = pd.crosstab(df[‘sepal-length’], df[‘class’],margins=True)
print(“Two way cross table\n”,two_way)

#scatter plot
ax = sns.catplot(x=”sepal-length”,y=”class”,data=df)

#voilen plot for bivariate
ax = sns.violinplot(x=two_way)
plt.title(“Voilen plot for two way cross table”)

#bar chart representation
fig, axs = plt.subplots(1, 2, figsize=(9, 3), sharey=True)
axs[0].bar(df[‘sepal-length’], df[‘class’])
axs[1].scatter(df[‘sepal-length’], df[‘class’])
plt.title(“Bar chart and scatter plot for categorical variable\n\n\n”)

#bar chart for two way table
ax = sns.countplot(x=”sepal-length”, hue=”class”, data=df)
plt.title(“Count plot for categorical variable”)

#chi-square test for categorical value
from scipy.stats import chi2_contingency
from scipy.stats import chi2
print(“Chi-square test results\n”)

#interpret test-statistic
print(“Statistic Interpretation\n”)
stat, p, dof, expected = chi2_contingency(two_way)
print(“Degrees of freedom:”,dof)
prob = 0.95
critical = chi2.ppf(prob, dof)
print(‘probability=%.3f, critical=%.3f, stat=%.3f’ % (prob, critical, stat))

if abs(stat) >= critical:
print(‘Dependent (reject H0)’)
print(‘Independent (fail to reject H0)’)

#interpret p-value
print(“p-value Interpretation\n”)
alpha = 1.0 – prob
print(‘significance=%.3f, p=%.3f’ % (alpha, p))
if p <= alpha:
print(‘Dependent (reject H0 accept H1)’)
print(‘Independent (fail to reject H0)’)

Leave Comment

Your email address will not be published. Required fields are marked *

clear formSubmit