Introduction to Regression in R
Richie Cotton
Data Evangelist at DataCamp
library(dplyr)
perch <- fish %>%
filter(species == "Perch")
species | mass_g | length_cm |
---|---|---|
Perch | 5.9 | 7.5 |
Perch | 32.0 | 12.5 |
Perch | 40.0 | 13.8 |
Perch | 51.5 | 15.0 |
Perch | 70.0 | 15.7 |
... | ... | ... |
ggplot(perch, aes(length_cm, mass_g)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE)
ggplot(perch, aes(length_cm ^ 3, mass_g)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE)
mdl_perch <- lm(mass_g ~ I(length_cm ^ 3), data = perch)
Call:
lm(formula = mass_g ~ I(length_cm^3), data = perch)
Coefficients:
(Intercept) I(length_cm^3)
-0.1175 0.0168
explanatory_data <- tibble(
length_cm = seq(10, 40, 5)
)
prediction_data <- explanatory_data %>%
mutate(
mass_g = predict(mdl_perch, explanatory_data)
)
# A tibble: 7 x 2
length_cm mass_g
<dbl> <dbl>
1 10 16.7
2 15 56.6
3 20 134.
4 25 262.
5 30 453.
6 35 720.
7 40 1075.
ggplot(perch, aes(length_cm ^ 3, mass_g)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
geom_point(data = prediction_data, color = "blue")
ggplot(perch, aes(length_cm, mass_g)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
geom_point(data = prediction_data, color = "blue")
spent_usd | n_impressions | n_clicks |
---|---|---|
1.43 | 7350 | 1 |
1.82 | 17861 | 2 |
1.25 | 4259 | 1 |
1.29 | 4133 | 1 |
4.77 | 15615 | 3 |
... | ... | ... |
ggplot(
ad_conversion,
aes(spent_usd, n_impressions)
) +
geom_point() +
geom_smooth(method = "lm", se = FALSE)
ggplot(
ad_conversion,
aes(sqrt(spent_usd), sqrt(n_impressions))
) +
geom_point() +
geom_smooth(method = "lm", se = FALSE)
mdl_ad <- lm(
sqrt(n_impressions) ~ sqrt(spent_usd),
data = ad_conversion
)
explanatory_data <- tibble(
spent_usd = seq(0, 600, 100)
)
prediction_data <- explanatory_data %>%
mutate(
sqrt_n_impressions = predict(
mdl_ad, explanatory_data
),
n_impressions = sqrt_n_impressions ^ 2
)
# A tibble: 7 x 3
spent_usd sqrt_n_impressions n_impressions
<dbl> <dbl> <dbl>
1 0 15.3 235.
2 100 598. 357289.
3 200 839. 703890.
4 300 1024. 1048771.
5 400 1180. 1392762.
6 500 1318. 1736184.
7 600 1442. 2079202.
Introduction to Regression in R