Example where an informative prior makes a difference. See Chapter 9 in Regression and Other Stories.
library("rprojroot")
root<-has_file(".ROS-Examples-root")$make_fix_file()
library("arm")
library("rstanarm")
x <- seq(-2,2,1)
y <- c(50, 44, 50, 47, 56)
sexratio <- data.frame(x, y)
theta_hat_prior <- 0
se_prior <- 0.25
theta_hat_data <- 8
se_data <- 3
theta_hat_bayes <- (theta_hat_prior/se_prior^2 + theta_hat_data/se_data^2)/(1/se_prior^2 + 1/se_data^2)
se_bayes <- sqrt(1/(1/se_prior^2 + 1/se_data^2))
fit <- lm(y ~ x, data = sexratio)
display(fit)
lm(formula = y ~ x, data = sexratio)
coef.est coef.se
(Intercept) 49.40 1.94
x 1.50 1.37
---
n = 5, k = 2
residual sd = 4.35, R-Squared = 0.28
par(mfrow=c(1,2), mar=c(3,3,3,2), mgp=c(1.7,.5,0), tck=-.01)
plot(x, y, ylim=c(43, 57), xlab="Attractiveness of parent", ylab="Percentage of girl babies", bty="l", yaxt="n", main="Data on beauty and sex ratio", pch=19, cex=1)
axis(2, c(45,50,55), paste(c(45,50,55), "%", sep=""))
plot(x, y, ylim=c(43, 57), xlab="Attractiveness of parent", ylab="Percentage of girl babies", bty="l", yaxt="n", main="Data and least-squares regression line", pch=19, cex=1)
axis(2, c(45,50,55), paste(c(45,50,55), "%", sep=""))
abline(coef(fit)[1], coef(fit)[2])
text(1, 52.2, paste("y = ", fround(coef(fit)[1], 1), " + ", fround(coef(fit)[2], 1), " x\n(Std err of slope is ", fround(se.coef(fit)[2], 1), ")", sep=""))
fit_default <- stan_glm(y ~ x, data = sexratio, refresh = 0)
prior_summary(fit_default)
Priors for model 'fit_default'
------
Intercept (after predictors centered)
Specified prior:
~ normal(location = 49, scale = 2.5)
Adjusted prior:
~ normal(location = 49, scale = 11)
Coefficients
Specified prior:
~ normal(location = 0, scale = 2.5)
Adjusted prior:
~ normal(location = 0, scale = 7)
Auxiliary (sigma)
Specified prior:
~ exponential(rate = 1)
Adjusted prior:
~ exponential(rate = 0.22)
------
See help('prior_summary.stanreg') for more details
print(fit_default)
stan_glm
family: gaussian [identity]
formula: y ~ x
observations: 5
predictors: 2
------
Median MAD_SD
(Intercept) 49.4 2.0
x 1.4 1.4
Auxiliary parameter(s):
Median MAD_SD
sigma 4.6 1.7
------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg
fit_post <- stan_glm(y ~ x, data = sexratio,
prior = normal(0, 0.2),
prior_intercept = normal(48.8, 0.5),
refresh = 0)
prior_summary(fit_post)
Priors for model 'fit_post'
------
Intercept (after predictors centered)
~ normal(location = 49, scale = 0.5)
Coefficients
~ normal(location = 0, scale = 0.2)
Auxiliary (sigma)
Specified prior:
~ exponential(rate = 1)
Adjusted prior:
~ exponential(rate = 0.22)
------
See help('prior_summary.stanreg') for more details
print(fit_post)
stan_glm
family: gaussian [identity]
formula: y ~ x
observations: 5
predictors: 2
------
Median MAD_SD
(Intercept) 48.8 0.5
x 0.0 0.2
Auxiliary parameter(s):
Median MAD_SD
sigma 4.3 1.3
------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg
par(mfrow=c(2,2), mar=c(5,3,3,2), mgp=c(1.7,.5,0), tck=-.01)
fit_bayes <- list(as.data.frame(fit_default), as.data.frame(fit_post))
for (k in 1:2){
sims <- fit_bayes[[k]]
coef_est <- apply(sims, 2, median)
b_se <- 1.483*median(abs(sims[,2] - median(sims[,2])))
plot(sims[,1], sims[,2], xlim=range(fit_bayes[[1]][,1], fit_bayes[[2]][,1]), ylim=range(fit_bayes[[1]][,2], fit_bayes[[2]][,2]), xlab="Intercept, a", ylab="Slope, b", main=paste("Posterior simulations under", if (k==1) "default prior" else "informative prior"), pch=19, cex=.2, bty="l")
plot(x, y, ylim=c(43, 57), xlab="Attractiveness of parent", ylab="Percentage of girl babies", bty="l", yaxt="n", main=if (k==1) "Least-squares regression line and\nposterior uncertainty given default prior" else "Bayes estimated regression line and\nposterior uncertainty given informative prior", pch=19, cex=1)
axis(2, c(45,50,55), paste(c(45,50,55), "%", sep=""))
for (i in 1:100){
abline(sims[i,1], sims[i,2], lwd=.5, col="gray50")
}
abline(coef_est[1], coef_est[2], lwd=2)
}