Predict respondents' yearly earnings using survey data from 1990. See Chapter 15 in Regression and Other Stories.
library("rprojroot")
root<-has_file(".ROS-Examples-root")$make_fix_file()
library("rstanarm")
library("ggplot2")
library("bayesplot")
theme_set(bayesplot::theme_default(base_family = "sans"))
earnings <- read.csv(root("Earnings/data","earnings.csv"))
head(earnings)
height weight male earn earnk ethnicity education mother_education
1 74 210 1 50000 50 White 16 16
2 66 125 0 60000 60 White 16 16
3 64 126 0 30000 30 White 16 16
4 65 200 0 25000 25 White 17 17
5 63 110 0 50000 50 Other 16 16
6 68 165 0 62000 62 Black 18 18
father_education walk exercise smokenow tense angry age
1 16 3 3 2 0 0 45
2 16 6 5 1 0 0 58
3 16 8 1 2 1 1 29
4 NA 8 1 2 0 0 57
5 16 5 6 2 0 0 91
6 18 1 1 2 2 2 54
fit_2a <- stan_glm((earn>0) ~ height + male,
family = binomial(link = "logit"),
data = earnings, refresh = 0)
sims_2a <- as.matrix(fit_2a)
print(fit_2a, digits=2)
stan_glm
family: binomial [logit]
formula: (earn > 0) ~ height + male
observations: 1816
predictors: 3
------
Median MAD_SD
(Intercept) -2.95 1.92
height 0.07 0.03
male 1.67 0.32
------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg
fit_2b <- stan_glm(log(earn) ~ height + male,
data = earnings, subset = earn>0,
refresh = 0)
sims_2b <- as.matrix(fit_2b)
print(fit_2b, digits=2)
stan_glm
family: gaussian [identity]
formula: log(earn) ~ height + male
observations: 1629
predictors: 3
------
Median MAD_SD
(Intercept) 7.97 0.51
height 0.02 0.01
male 0.37 0.06
Auxiliary parameter(s):
Median MAD_SD
sigma 0.87 0.02
------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg
new <- data.frame(height = 68, male = 0, earnings$earn>0)
pred_2a <- posterior_predict(fit_2a, newdata=new)
pred_2b <- posterior_predict(fit_2b, newdata=new)
pred <- ifelse(pred_2a == 1, exp(pred_2b), 0)