Anomaly Detection in Python
Bekhruz (Bex) Tuychiev
Kaggle Master, Data Science Content Creator
import pandas as pd
numbers = pd.Series([24, 46, 30, 28, 1289, 25, 21, 31, 48, 47])
numbers_a = pd.Series([24, 46, ...])
numbers_a.mean()
33.33
numbers_a.var()
114.5
numbers_b = pd.Series([1289, 24, ...])
numbers_b.mean()
158.9
numbers_b.var()
157771.65
import pandas as pd
big_mart = pd.read_csv("big_mart.csv")
sales = big_mart['sales']
sales.describe()
count 8523.000000
mean 2181.288914
std 1706.499616
min 33.290000
25% 834.247400
50% 1794.331000
75% 3101.296400
max 13086.964800
import numpy as np import matplotlib.pyplot as plt # Find the square root of the length of sales n_bins = np.sqrt(len(sales)) # Cast to an integer n_bins = int(n_bins)
# Plot plt.figure(figsize=(8, 4)) plt.hist(sales, bins=n_bins, color='red')
integers = range(len(sales))
plt.figure(figsize=(16, 8))
plt.scatter(integers, sales, c='red', alpha=0.5)
Anomaly Detection in Python