Linear regression and Bayes-R2 and LOO-R2. See Chapter 11 in Regression and Other Stories.
See also Gelman, Goodrich, Gabry, and Vehtari (2018). R-squared for Bayesian regression models. The American Statistician 73:307-309, doi:10.1080/00031305.2018.1549100
Load packages
library("rprojroot")
root<-has_file(".ROS-Examples-root")$make_fix_file()
library("rstanarm")
library("loo")
library("foreign")
library("ggplot2")
theme_set(bayesplot::theme_default(base_family = "sans"))
library(reshape2)
# for reproducability
SEED<-1507
Load children's test scores data
kidiq <- read.csv(root("KidIQ/data","kidiq.csv"))
head(kidiq)
kid_score mom_hs mom_iq mom_work mom_age
1 65 1 121.11753 4 27
2 98 1 89.36188 4 25
3 85 1 115.44316 4 27
4 83 1 99.44964 3 25
5 115 1 92.74571 4 27
6 98 0 107.90184 1 18
Compare different models
A single binary predictor
fit_1 <- stan_glm(kid_score ~ mom_hs, data=kidiq,
seed=SEED, refresh=0)
loo_1 <- loo(fit_1)
print(loo_1)
Computed from 4000 by 434 log-likelihood matrix
Estimate SE
elpd_loo -1914.8 13.8
p_loo 3.1 0.3
looic 3829.6 27.6
------
Monte Carlo SE of elpd_loo is 0.0.
All Pareto k estimates are good (k < 0.5).
See help('pareto-k-diagnostic') for details.
Two predictors
fit_3 <- stan_glm(kid_score ~ mom_hs + mom_iq, data=kidiq,
seed=SEED, refresh=0)
loo_3 <- loo(fit_3)
print(loo_3)
Computed from 4000 by 434 log-likelihood matrix
Estimate SE
elpd_loo -1876.1 14.2
p_loo 4.0 0.4
looic 3752.1 28.5
------
Monte Carlo SE of elpd_loo is 0.0.
All Pareto k estimates are good (k < 0.5).
See help('pareto-k-diagnostic') for details.
Compare models based on expected log predictive density
loo_compare(loo_1, loo_3)
elpd_diff se_diff
fit_3 0.0 0.0
fit_1 -38.7 8.3
Bayes-R2
Median Bayes-R2 increases
br2_1<-bayes_R2(fit_1)
br2_3<-bayes_R2(fit_3)
round(median(br2_1),3)
[1] 0.056
round(median(br2_3),3)
[1] 0.215
Plot Bayes-R2 posteriors
Increase in R2 is clear
df <- melt(data.frame(fit_3=br2_1,fit_3n=br2_3))
ggplot(df, aes(x=value, linetype=variable)) +
geom_density(alpha=0.25, show.legend=FALSE) +
labs(x="Bayes-R^2", y="") +
scale_y_continuous(breaks=NULL) +
annotate("text", x = 0.107, y = 16.2, label = "kid_score ~ mom_hs") +
annotate("text", x = 0.28, y = 13.2, label = "kid_score ~ mom_hs + mom_iq")
R2 with LOO-CV
LOO-R2 are smaller, but the difference is still large.
round(median(loo_R2(fit_1)),3)
[1] 0.049
round(median(loo_R2(fit_3)),3)
[1] 0.205
Add five pure noise predictors to the data
set.seed(SEED)
n=nrow(kidiq)
kidiqr <- kidiq
kidiqr$noise <- array(rnorm(5*n), c(n,5))
Linear regression with additional noise predictors
fit_3n <- stan_glm(kid_score ~ mom_hs + mom_iq + noise, data=kidiqr,
seed=SEED, refresh=0)
Linear regression with interaction
fit_4 <- stan_glm(kid_score ~ mom_hs + mom_iq + mom_iq:mom_hs, data=kidiq,
seed=SEED, refresh=0)
Bayes-R2
Median Bayes-R2 increases, but...
br2_3<-bayes_R2(fit_3)
br2_3n<-bayes_R2(fit_3n)
round(median(br2_3),3)
[1] 0.215
round(median(br2_3n),3)
[1] 0.224
Plot Bayes-R2 posteriors
Median Bayes-R2 increases, but that increase is negligible compared to the uncertainty
df <- melt(data.frame(fit_3=br2_3,fit_3n=br2_3n))
ggplot(df, aes(x=value, linetype=variable)) +
geom_density(alpha=0.25, show.legend=FALSE) +
labs(x="Bayes-R^2", y="") +
scale_y_continuous(breaks=NULL) +
annotate("text", x = 0.15, y = 11.5, label = "kid_score ~ mom_hs + mom_iq") +
annotate("text", x = 0.285, y = 12, label = "kid_score ~ mom_hs + mom_iq +\n noise")
R2 with LOO-CV
LOO-R2 decreases when five noise predictors are addded
round(median(loo_R2(fit_3)),3)
[1] 0.205
round(median(loo_R2(fit_3n)),3)
[1] 0.191
LOO-R2 increases when interaction is addded
round(median(loo_R2(fit_4)),2)
[1] 0.22
LS0tCnRpdGxlOiAiUmVncmVzc2lvbiBhbmQgT3RoZXIgU3RvcmllczogS2lkSVEgQmF5ZXMtUjIgYW5kIExPTy1SMiIKYXV0aG9yOiAiQW5kcmV3IEdlbG1hbiwgSmVubmlmZXIgSGlsbCwgQWtpIFZlaHRhcmkiCmRhdGU6ICJgciBmb3JtYXQoU3lzLkRhdGUoKSlgIgpvdXRwdXQ6CiAgaHRtbF9kb2N1bWVudDoKICAgIHRoZW1lOiByZWFkYWJsZQogICAgdG9jOiB0cnVlCiAgICB0b2NfZGVwdGg6IDIKICAgIHRvY19mbG9hdDogdHJ1ZQogICAgY29kZV9kb3dubG9hZDogdHJ1ZQotLS0KTGluZWFyIHJlZ3Jlc3Npb24gYW5kIEJheWVzLVIyIGFuZCBMT08tUjIuIFNlZSBDaGFwdGVyIDExIGluClJlZ3Jlc3Npb24gYW5kIE90aGVyIFN0b3JpZXMuCgpTZWUgYWxzbyBHZWxtYW4sIEdvb2RyaWNoLCBHYWJyeSwgYW5kIFZlaHRhcmkgKDIwMTgpLiBSLXNxdWFyZWQgZm9yCkJheWVzaWFuIHJlZ3Jlc3Npb24gbW9kZWxzLiBUaGUgQW1lcmljYW4gU3RhdGlzdGljaWFuIDczOjMwNy0zMDksCltkb2k6MTAuMTA4MC8wMDAzMTMwNS4yMDE4LjE1NDkxMDBdKGh0dHBzOi8vZG9pLm9yZy8xMC4xMDgwLzAwMDMxMzA1LjIwMTguMTU0OTEwMCkKCi0tLS0tLS0tLS0tLS0KCgpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KG1lc3NhZ2U9RkFMU0UsIGVycm9yPUZBTFNFLCB3YXJuaW5nPUZBTFNFLCBjb21tZW50PU5BKQpgYGAKCiMjIyMgTG9hZCBwYWNrYWdlcwoKYGBge3IgfQpsaWJyYXJ5KCJycHJvanJvb3QiKQpyb290PC1oYXNfZmlsZSgiLlJPUy1FeGFtcGxlcy1yb290IikkbWFrZV9maXhfZmlsZSgpCmxpYnJhcnkoInJzdGFuYXJtIikKbGlicmFyeSgibG9vIikKbGlicmFyeSgiZm9yZWlnbiIpCmxpYnJhcnkoImdncGxvdDIiKQp0aGVtZV9zZXQoYmF5ZXNwbG90Ojp0aGVtZV9kZWZhdWx0KGJhc2VfZmFtaWx5ID0gInNhbnMiKSkKbGlicmFyeShyZXNoYXBlMikKIyBmb3IgcmVwcm9kdWNhYmlsaXR5ClNFRUQ8LTE1MDcKYGBgCgojIyMjIExvYWQgY2hpbGRyZW4ncyB0ZXN0IHNjb3JlcyBkYXRhCgpgYGB7ciB9CmtpZGlxIDwtIHJlYWQuY3N2KHJvb3QoIktpZElRL2RhdGEiLCJraWRpcS5jc3YiKSkKaGVhZChraWRpcSkKYGBgCgojIyBDb21wYXJlIGRpZmZlcmVudCBtb2RlbHMKCiMjIyMgQSBzaW5nbGUgYmluYXJ5IHByZWRpY3RvcgoKYGBge3IgfQpmaXRfMSA8LSBzdGFuX2dsbShraWRfc2NvcmUgfiBtb21faHMsIGRhdGE9a2lkaXEsCiAgICAgICAgICAgICAgICAgIHNlZWQ9U0VFRCwgcmVmcmVzaD0wKQpsb29fMSA8LSBsb28oZml0XzEpCnByaW50KGxvb18xKQpgYGAKCiMjIyMgVHdvIHByZWRpY3RvcnMKCmBgYHtyIH0KZml0XzMgPC0gc3Rhbl9nbG0oa2lkX3Njb3JlIH4gbW9tX2hzICsgbW9tX2lxLCBkYXRhPWtpZGlxLAogICAgICAgICAgICAgICAgICBzZWVkPVNFRUQsIHJlZnJlc2g9MCkKbG9vXzMgPC0gbG9vKGZpdF8zKQpwcmludChsb29fMykKYGBgCgojIyMjIENvbXBhcmUgbW9kZWxzIGJhc2VkIG9uIGV4cGVjdGVkIGxvZyBwcmVkaWN0aXZlIGRlbnNpdHkKCmBgYHtyIH0KbG9vX2NvbXBhcmUobG9vXzEsIGxvb18zKQpgYGAKCiMjIyMgQmF5ZXMtUjI8YnI+Ck1lZGlhbiBCYXllcy1SMiBpbmNyZWFzZXMKCmBgYHtyIH0KYnIyXzE8LWJheWVzX1IyKGZpdF8xKQpicjJfMzwtYmF5ZXNfUjIoZml0XzMpCnJvdW5kKG1lZGlhbihicjJfMSksMykKcm91bmQobWVkaWFuKGJyMl8zKSwzKQpgYGAKCiMjIyMgUGxvdCBCYXllcy1SMiBwb3N0ZXJpb3JzPGJyPgpJbmNyZWFzZSBpbiBSMiBpcyBjbGVhcgoKYGBge3IgfQpkZiA8LSBtZWx0KGRhdGEuZnJhbWUoZml0XzM9YnIyXzEsZml0XzNuPWJyMl8zKSkKZ2dwbG90KGRmLCBhZXMoeD12YWx1ZSwgbGluZXR5cGU9dmFyaWFibGUpKSArCiAgICBnZW9tX2RlbnNpdHkoYWxwaGE9MC4yNSwgc2hvdy5sZWdlbmQ9RkFMU0UpICsKICAgIGxhYnMoeD0iQmF5ZXMtUl4yIiwgeT0iIikgKwogICAgc2NhbGVfeV9jb250aW51b3VzKGJyZWFrcz1OVUxMKSArIAogICAgYW5ub3RhdGUoInRleHQiLCB4ID0gMC4xMDcsIHkgPSAxNi4yLCBsYWJlbCA9ICJraWRfc2NvcmUgfiBtb21faHMiKSArCiAgICBhbm5vdGF0ZSgidGV4dCIsIHggPSAwLjI4LCB5ID0gMTMuMiwgbGFiZWwgPSAia2lkX3Njb3JlIH4gbW9tX2hzICsgbW9tX2lxIikKYGBgCgojIyMjIFIyIHdpdGggTE9PLUNWPGJyPgpMT08tUjIgYXJlIHNtYWxsZXIsIGJ1dCB0aGUgZGlmZmVyZW5jZSBpcyBzdGlsbCBsYXJnZS4KCmBgYHtyIH0Kcm91bmQobWVkaWFuKGxvb19SMihmaXRfMSkpLDMpCnJvdW5kKG1lZGlhbihsb29fUjIoZml0XzMpKSwzKQpgYGAKCiMjIEFkZCBmaXZlIHB1cmUgbm9pc2UgcHJlZGljdG9ycyB0byB0aGUgZGF0YQoKYGBge3IgfQpzZXQuc2VlZChTRUVEKQpuPW5yb3coa2lkaXEpCmtpZGlxciA8LSBraWRpcQpraWRpcXIkbm9pc2UgPC0gYXJyYXkocm5vcm0oNSpuKSwgYyhuLDUpKQpgYGAKCiMjIyMgTGluZWFyIHJlZ3Jlc3Npb24gd2l0aCBhZGRpdGlvbmFsIG5vaXNlIHByZWRpY3RvcnMKCmBgYHtyIHJlc3VsdHM9J2hpZGUnfQpmaXRfM24gPC0gc3Rhbl9nbG0oa2lkX3Njb3JlIH4gbW9tX2hzICsgbW9tX2lxICsgbm9pc2UsIGRhdGE9a2lkaXFyLAogICAgICAgICAgICAgICAgICAgc2VlZD1TRUVELCByZWZyZXNoPTApCmBgYAoKIyMjIyBMaW5lYXIgcmVncmVzc2lvbiB3aXRoIGludGVyYWN0aW9uCgpgYGB7ciByZXN1bHRzPSdoaWRlJ30KZml0XzQgPC0gc3Rhbl9nbG0oa2lkX3Njb3JlIH4gbW9tX2hzICsgbW9tX2lxICsgbW9tX2lxOm1vbV9ocywgZGF0YT1raWRpcSwKICAgICAgICAgICAgICAgICAgc2VlZD1TRUVELCByZWZyZXNoPTApCmBgYAoKIyMjIyBCYXllcy1SMjxicj4KTWVkaWFuIEJheWVzLVIyIGluY3JlYXNlcywgYnV0Li4uCgpgYGB7ciB9CmJyMl8zPC1iYXllc19SMihmaXRfMykKYnIyXzNuPC1iYXllc19SMihmaXRfM24pCnJvdW5kKG1lZGlhbihicjJfMyksMykKcm91bmQobWVkaWFuKGJyMl8zbiksMykKYGBgCgojIyMjIFBsb3QgQmF5ZXMtUjIgcG9zdGVyaW9yczxicj4KTWVkaWFuIEJheWVzLVIyIGluY3JlYXNlcywgYnV0IHRoYXQgaW5jcmVhc2UgaXMgbmVnbGlnaWJsZQpjb21wYXJlZCB0byB0aGUgdW5jZXJ0YWludHkKCmBgYHtyIH0KZGYgPC0gbWVsdChkYXRhLmZyYW1lKGZpdF8zPWJyMl8zLGZpdF8zbj1icjJfM24pKQpnZ3Bsb3QoZGYsIGFlcyh4PXZhbHVlLCBsaW5ldHlwZT12YXJpYWJsZSkpICsKICAgIGdlb21fZGVuc2l0eShhbHBoYT0wLjI1LCBzaG93LmxlZ2VuZD1GQUxTRSkgKwogICAgbGFicyh4PSJCYXllcy1SXjIiLCB5PSIiKSArCiAgICBzY2FsZV95X2NvbnRpbnVvdXMoYnJlYWtzPU5VTEwpICsgCiAgICBhbm5vdGF0ZSgidGV4dCIsIHggPSAwLjE1LCB5ID0gMTEuNSwgbGFiZWwgPSAia2lkX3Njb3JlIH4gbW9tX2hzICsgbW9tX2lxIikgKwogICAgYW5ub3RhdGUoInRleHQiLCB4ID0gMC4yODUsIHkgPSAxMiwgbGFiZWwgPSAia2lkX3Njb3JlIH4gbW9tX2hzICsgbW9tX2lxICtcbiBub2lzZSIpCmBgYAoKIyMjIyBSMiB3aXRoIExPTy1DVjxicj4KTE9PLVIyIGRlY3JlYXNlcyB3aGVuIGZpdmUgbm9pc2UgcHJlZGljdG9ycyBhcmUgYWRkZGVkCgpgYGB7ciB9CnJvdW5kKG1lZGlhbihsb29fUjIoZml0XzMpKSwzKQpyb3VuZChtZWRpYW4obG9vX1IyKGZpdF8zbikpLDMpCmBgYAoKTE9PLVIyIGluY3JlYXNlcyB3aGVuIGludGVyYWN0aW9uIGlzIGFkZGRlZAoKYGBge3IgfQpyb3VuZChtZWRpYW4obG9vX1IyKGZpdF80KSksMikKYGBgCgo=