Monte Carlo Simulations in Python
Izzy Weber
Curriculum Manager, DataCamp
What is the difference in outcomes for people in the first and fourth age quantiles?
print(np.quantile(df_summary["age"], 0.25))
print(np.quantile(df_summary["age"], 0.75))
39.78584463094688
57.52092642086441
age_q25 = np.quantile(df_summary["age"], 0.25) age_q75 = np.quantile(df_summary["age"], 0.75)
mean_age_q75_outcome = np.mean(df_summary[df_summary["age"] > age_q75]["predicted_y"]) mean_age_q25_outcome = np.mean(df_summary[df_summary["age"] < age_q25]["predicted_y"])
mean_age_q75_outcome - mean_age_q25_outcome
34.09429663553621
y_diffs = [] for i in range(1000): simulation_results = st.multivariate_normal.rvs(mean=mean_dia, size=1000, cov=cov_dia) df_results = pd.DataFrame(simulation_results, columns=["age", "bmi", "bp", "tc", "ldl", "hdl", "tch", "ltg", "glu"])
predicted_y = regr_model.predict(df_results) df_y = pd.DataFrame(predicted_y, columns=["predicted_y"]) df_sum = pd.concat([df_results, df_y], axis=1)
age_q25 = np.quantile(df_sum["age"], 0.25) age_q75 = np.quantile(df_sum["age"], 0.75) bmi_q25 = np.quantile(df_sum["bmi"], 0.25) bmi_q75 = np.quantile(df_sum["bmi"], 0.75)
q75_outcome = np.mean(df_sum[(df_sum["bmi"] > bmi_q75) & (df_sum["age"] > age_q75)]["predicted_y"]) q25_outcome = np.mean(df_sum[(df_sum["bmi"] < bmi_q25) & (df_sum["age"] < age_q25)]["predicted_y"])
y_diff = q75_outcome - q25_outcome y_diffs.append(y_diff)
np.mean(y_diffs)
132.4948511247819
np.quantile(y_diffs,0.025)
120.73340800299707
np.quantile(y_diffs,0.975)
144.1344994507557
np.std(y_diffs)
5.9322225537128
sns.histplot(y_diffs)
Monte Carlo Simulations in Python