To see how to implement uni-variate analysis using python.
Steps:
#import libraries
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import seaborn as sns
sns.set(style=”whitegrid”)
import numpy as np
#load data set URL
url = “https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data”
names = [‘sepal-length’, ‘sepal-width’, ‘petal-length’, ‘petal-width’, ‘class’]
data = pd.read_csv(url, names=names)
df = pd.DataFrame(data)
var = df[‘sepal-width’]
print(“Summary of Uni-variate\n”,var.describe())
df1 = df.sample(50)
count = len(var)
one_way = pd.crosstab(var,count,margins=True)
print(one_way)
print(“\n”)
#Data visualization
sns.countplot(x=”sepal-width”,data=df1)
plt.show()
plt.hist(var, density=True, bins=30)
plt.show()
sns.distplot(var)
plt.show()
sns.boxplot(var)
plt.show()
sns.countplot(var)
plt.show()
#Normality test
s, p = stats.shapiro(var)
print(“Ststistics:”,s)
print(“p-value:”,p)
alpha = 0.05
if p > alpha:
print(“Passed normality test”)
else:
print(“Fails to pass normality test”)
print(“Data does not follows normal distribution”)