Supervised Learning with scikit-learn
George Boorman
Core Curriculum Manager, DataCamp
import pandas as pd
diabetes_df = pd.read_csv("diabetes.csv")
print(diabetes_df.head())
pregnancies glucose triceps insulin bmi age diabetes
0 6 148 35 0 33.6 50 1
1 1 85 29 0 26.6 31 0
2 8 183 0 0 23.3 32 1
3 1 89 23 94 28.1 21 0
4 0 137 35 168 43.1 33 1
X = diabetes_df.drop("glucose", axis=1).values y = diabetes_df["glucose"].values
print(type(X), type(y))
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
X_bmi = X[:, 3]
print(y.shape, X_bmi.shape)
(752,) (752,)
X_bmi = X_bmi.reshape(-1, 1)
print(X_bmi.shape)
(752, 1)
import matplotlib.pyplot as plt
plt.scatter(X_bmi, y)
plt.ylabel("Blood Glucose (mg/dl)") plt.xlabel("Body Mass Index") plt.show()
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(X_bmi, y)
predictions = reg.predict(X_bmi)
plt.scatter(X_bmi, y)
plt.plot(X_bmi, predictions) plt.ylabel("Blood Glucose (mg/dl)") plt.xlabel("Body Mass Index") plt.show()
Supervised Learning with scikit-learn