Analyzing Survey Data in Python
EbunOluwa Andrew
Data Scientist


import pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport statsmodels.api as smexercise_data = pd.read_csv('workout_survey_data.csv') print(exercise_data.head())
| workout_minutes | calories_burned |
|-----------------|-----------------|
| 77              | 79.775152       |
| 21              | 23.177279       |
| 22              | 25.609262       |
| 20              | 17.857388       |
x = independent variable y = dependent variable
x = exercise_data.minutes.tolist()
y = exercise_data.calories.tolist() 
print(x,'\n',y)
| [77, 21, 22, 20, 36...           |
|----------------------------------|
| [79.7, 23.1, 25.6, 17.8, 41.8... |
| workout_minutes | calories_burned | 
|---|---|
| 77 | 79.775152 | 
| 21 | 23.177279 | 
| 22 | 25.609262 | 
| 20 | 17.857388 | 
| 36 | 41.849864 | 
x = sm.add_constant(x)
print (x)

result = sm.OLS(y,x).fit()
print(result.summary())

x = exercise_data.minutes.tolist()
y = exercise_data.calories.tolist()
plt.scatter(x,y)
plt.xlabel('minutes')
plt.ylabel('calories')
plt.show()

max_x = exercise_data.minutes.max() min_x = exercise_data.minutes.min() x = np.arange(min_x, max_x, 1)y = 1.0072*x + 0.1552plt.plot(y, 'r') plt.show()
y = 1.0072 * 30 + 0.1552
print(y)
30.3712
Analyzing Survey Data in Python