Anomaly Detection in Python
Bekhruz (Bex) Tuychiev
Kaggle Master, Data Science Content Creator
A = np.array([9, 1, 6]) B = np.array([25, 44, 85])
diffs = (B - A) ** 2
dist_AB = np.sqrt(np.sum(diffs)) print(dist_AB)
91.35644476444998
from scipy.spatial.distance import \
euclidean
dist_AB = euclidean(A, B)
dist_AB
91.35644476444998
from sklearn.preprocessing import StandardScaler ss = StandardScaler()
# Extract feature and target X = males.drop("weightkg", axis=1) y = males[['weightkg']]
# Fit ss.fit(X)
X_transformed = ss.transform(X)
X_transformed[:5]
array([[-1.05174523],
[-0.29289108],
[ 1.3446363 ],
[-1.21654894],
[0.056451235]])
ss = StandardScaler()
X_transformed = ss.fit_transform(X)
from sklearn.preprocessing import QuantileTransformer
# Init qt = QuantileTransformer() X = males.drop("weightkg", axis=1) y = males[['weightkg']]
X_transformed = qt.fit_transform(X) X_transformed.shape
(4082, 94)
qt = QuantileTransformer()
X.loc[:, :] = qt.fit_transform(X)
X.head()
plt.hist(X['footlength'], color='red')
plt.xlabel("Foot length")
plt.title("Histogram of foot lengths")
plt.show()
qt = QuantileTransformer(
output_distribution='normal')
# Rebuild the overridden feature array
X = males.drop("weightkg", axis=1)
X.loc[:, :] = qt.fit_transform(X)
plt.hist(X['footlength'], color='r')
plt.xlabel("Foot length")
plt.title("Histogram of foot lengths")
plt.show()
Anomaly Detection in Python