Plot residuals vs. predicted values, or residuals vs. observed values? See Chapter 11 in Regression and Other Stories.
Load packages
library("rprojroot")
root<-has_file(".ROS-Examples-root")$make_fix_file()
library("rstanarm")
Load data
introclass <- read.table(root("Introclass/data","gradesW4315.dat"), header=TRUE)
head(introclass)
hw1 hw2 hw3 hw4 midterm hw5 hw6 hw7 final
1 95 88 100 95 80 96 99 0 103
2 0 74 74 0 53 83 97 0 79
3 100 0 105 100 91 96 100 96 122
4 0 90 76 100 63 91 95 0 78
5 100 96 99 100 91 93 100 92 135
6 90 83 95 100 73 89 100 90 117
Linear regression model
The option refresh = 0
supresses the default Stan sampling progress output. This is useful for small data with fast computation. For more complex models and bigger data, it can be useful to see the progress.
fit_1 <- stan_glm(final ~ midterm, data = introclass, refresh = 0)
print(fit_1)
stan_glm
family: gaussian [identity]
formula: final ~ midterm
observations: 52
predictors: 2
------
Median MAD_SD
(Intercept) 65.4 17.6
midterm 0.7 0.2
Auxiliary parameter(s):
Median MAD_SD
sigma 14.9 1.5
------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg
Compute residuals
compute predictions from simulations
sims <- as.matrix(fit_1)
predicted <- colMeans(sims[,1] + sims[,2] %*% t(introclass$midterm))
or with built-in function
predicted <- predict(fit_1)
resid <- introclass$final - predicted
Plot residuals vs predicted
plot(predicted, resid, xlab="predicted value", ylab="residual",
main="Residuals vs.\ predicted values", mgp=c(1.5,.5,0), pch=20, yaxt="n")
axis(2, seq(-40,40,20), mgp=c(1.5,.5,0))
abline(0, 0, col="gray", lwd=.5)
Plot residuals vs observed
plot(introclass$final, resid, xlab="observed value", ylab="residual", main="Residuals vs.\ observed values", mgp=c(1.5,.5,0), pch=20, yaxt="n")
axis(2, seq(-40,40,20), mgp=c(1.5,.5,0))
abline(0, 0, col="gray", lwd=.5)
Simulated fake data
a <- 65
b <- 0.7
sigma <- 15
n <- nrow(introclass)
introclass$final_fake <- a + b*introclass$midterm + rnorm(n, 0, 15)
fit_fake <- stan_glm(final_fake ~ midterm, data = introclass, refresh = 0)
Compute residuals
compute predictions from simulations
sims <- as.matrix(fit_fake)
predicted_fake <- colMeans(sims[,1] + sims[,2] %*% t(introclass$midterm))
or with built-in function
predicted_fake <- predict(fit_fake)
resid_fake <- introclass$final_fake - predicted_fake
Plot residuals vs predicted
plot(predicted_fake, resid_fake, xlab="predicted value", ylab="residual", main="Fake data: resids vs.\ predicted", mgp=c(1.5,.5,0), pch=20, yaxt="n")
axis(2, seq(-40,40,20), mgp=c(1.5,.5,0))
abline(0, 0, col="gray", lwd=.5)
Plot residuals vs observed
plot(introclass$final_fake, resid_fake, xlab="observed value", ylab="residual", main="Fake data: resids vs.\ observed", mgp=c(1.5,.5,0), pch=20, yaxt="n")
axis(2, seq(-40,40,20), mgp=c(1.5,.5,0))
abline(0, 0, col="gray", lwd=.5)
LS0tCnRpdGxlOiAiUmVncmVzc2lvbiBhbmQgT3RoZXIgU3RvcmllczogSW50cm9jbGFzcyIKYXV0aG9yOiAiQW5kcmV3IEdlbG1hbiwgSmVubmlmZXIgSGlsbCwgQWtpIFZlaHRhcmkiCmRhdGU6ICJgciBmb3JtYXQoU3lzLkRhdGUoKSlgIgpvdXRwdXQ6CiAgaHRtbF9kb2N1bWVudDoKICAgIHRoZW1lOiByZWFkYWJsZQogICAgdG9jOiB0cnVlCiAgICB0b2NfZGVwdGg6IDIKICAgIHRvY19mbG9hdDogdHJ1ZQogICAgY29kZV9kb3dubG9hZDogdHJ1ZQotLS0KUGxvdCByZXNpZHVhbHMgdnMuXCBwcmVkaWN0ZWQgdmFsdWVzLCBvciByZXNpZHVhbHMgdnMuXCBvYnNlcnZlZAp2YWx1ZXM/IFNlZSBDaGFwdGVyIDExIGluIFJlZ3Jlc3Npb24gYW5kIE90aGVyIFN0b3JpZXMuCgotLS0tLS0tLS0tLS0tCgoKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9CmtuaXRyOjpvcHRzX2NodW5rJHNldChtZXNzYWdlPUZBTFNFLCBlcnJvcj1GQUxTRSwgd2FybmluZz1GQUxTRSwgY29tbWVudD1OQSkKIyBzd2l0Y2ggdGhpcyB0byBUUlVFIHRvIHNhdmUgZmlndXJlcyBpbiBzZXBhcmF0ZSBmaWxlcwpzYXZlZmlncyA8LSBGQUxTRQpgYGAKCiMjIyMgTG9hZCBwYWNrYWdlcwoKYGBge3IgfQpsaWJyYXJ5KCJycHJvanJvb3QiKQpyb290PC1oYXNfZmlsZSgiLlJPUy1FeGFtcGxlcy1yb290IikkbWFrZV9maXhfZmlsZSgpCmxpYnJhcnkoInJzdGFuYXJtIikKYGBgCgojIyMjIExvYWQgZGF0YQoKYGBge3IgfQppbnRyb2NsYXNzIDwtIHJlYWQudGFibGUocm9vdCgiSW50cm9jbGFzcy9kYXRhIiwiZ3JhZGVzVzQzMTUuZGF0IiksIGhlYWRlcj1UUlVFKQpoZWFkKGludHJvY2xhc3MpCmBgYAoKIyMgTGluZWFyIHJlZ3Jlc3Npb24gbW9kZWwKClRoZSBvcHRpb24gYHJlZnJlc2ggPSAwYCBzdXByZXNzZXMgdGhlIGRlZmF1bHQgU3RhbiBzYW1wbGluZwpwcm9ncmVzcyBvdXRwdXQuIFRoaXMgaXMgdXNlZnVsIGZvciBzbWFsbCBkYXRhIHdpdGggZmFzdApjb21wdXRhdGlvbi4gRm9yIG1vcmUgY29tcGxleCBtb2RlbHMgYW5kIGJpZ2dlciBkYXRhLCBpdCBjYW4gYmUKdXNlZnVsIHRvIHNlZSB0aGUgcHJvZ3Jlc3MuCgpgYGB7ciB9CmZpdF8xIDwtIHN0YW5fZ2xtKGZpbmFsIH4gbWlkdGVybSwgZGF0YSA9IGludHJvY2xhc3MsIHJlZnJlc2ggPSAwKQpwcmludChmaXRfMSkKYGBgCgojIyMjIENvbXB1dGUgcmVzaWR1YWxzPGJyPgpjb21wdXRlIHByZWRpY3Rpb25zIGZyb20gc2ltdWxhdGlvbnMKCmBgYHtyIH0Kc2ltcyA8LSBhcy5tYXRyaXgoZml0XzEpCnByZWRpY3RlZCA8LSBjb2xNZWFucyhzaW1zWywxXSArIHNpbXNbLDJdICUqJSB0KGludHJvY2xhc3MkbWlkdGVybSkpCmBgYAoKb3Igd2l0aCBidWlsdC1pbiBmdW5jdGlvbgoKYGBge3IgfQpwcmVkaWN0ZWQgPC0gcHJlZGljdChmaXRfMSkKcmVzaWQgPC0gaW50cm9jbGFzcyRmaW5hbCAtIHByZWRpY3RlZApgYGAKCiMjIyMgUGxvdCByZXNpZHVhbHMgdnMgcHJlZGljdGVkCgpgYGB7ciBldmFsPUZBTFNFLCBpbmNsdWRlPUZBTFNFfQppZiAoc2F2ZWZpZ3MpIHBvc3RzY3JpcHQocm9vdCgiSW50cm9jbGFzcy9maWdzIiwiZmFrZXJlc2lkMWEucHMiKSwgaGVpZ2h0PTMuOCwgd2lkdGg9NC41LCBjb2xvcm1vZGVsPSJncmF5IikKYGBgCmBgYHtyIH0KcGxvdChwcmVkaWN0ZWQsIHJlc2lkLCB4bGFiPSJwcmVkaWN0ZWQgdmFsdWUiLCB5bGFiPSJyZXNpZHVhbCIsCiAgICAgbWFpbj0iUmVzaWR1YWxzIHZzLlwgcHJlZGljdGVkIHZhbHVlcyIsIG1ncD1jKDEuNSwuNSwwKSwgcGNoPTIwLCB5YXh0PSJuIikKYXhpcygyLCBzZXEoLTQwLDQwLDIwKSwgbWdwPWMoMS41LC41LDApKQphYmxpbmUoMCwgMCwgY29sPSJncmF5IiwgbHdkPS41KQpgYGAKYGBge3IgZXZhbD1GQUxTRSwgaW5jbHVkZT1GQUxTRX0KaWYgKHNhdmVmaWdzKSBkZXYub2ZmKCkKYGBgCgojIyMjIFBsb3QgcmVzaWR1YWxzIHZzIG9ic2VydmVkCgpgYGB7ciBldmFsPUZBTFNFLCBpbmNsdWRlPUZBTFNFfQppZiAoc2F2ZWZpZ3MpIHBvc3RzY3JpcHQocm9vdCgiSW50cm9jbGFzcy9maWdzIiwiZmFrZXJlc2lkMWIucHMiKSwgaGVpZ2h0PTMuOCwgd2lkdGg9NC41LCBjb2xvcm1vZGVsPSJncmF5IikKYGBgCmBgYHtyIH0KcGxvdChpbnRyb2NsYXNzJGZpbmFsLCByZXNpZCwgeGxhYj0ib2JzZXJ2ZWQgdmFsdWUiLCB5bGFiPSJyZXNpZHVhbCIsIG1haW49IlJlc2lkdWFscyB2cy5cIG9ic2VydmVkIHZhbHVlcyIsIG1ncD1jKDEuNSwuNSwwKSwgcGNoPTIwLCB5YXh0PSJuIikKYXhpcygyLCBzZXEoLTQwLDQwLDIwKSwgbWdwPWMoMS41LC41LDApKQphYmxpbmUoMCwgMCwgY29sPSJncmF5IiwgbHdkPS41KQpgYGAKYGBge3IgZXZhbD1GQUxTRSwgaW5jbHVkZT1GQUxTRX0KaWYgKHNhdmVmaWdzKSBkZXYub2ZmKCkKYGBgCgojIyBTaW11bGF0ZWQgZmFrZSBkYXRhCgpgYGB7ciB9CmEgPC0gNjUKYiA8LSAwLjcKc2lnbWEgPC0gMTUKbiA8LSBucm93KGludHJvY2xhc3MpCmludHJvY2xhc3MkZmluYWxfZmFrZSA8LSBhICsgYippbnRyb2NsYXNzJG1pZHRlcm0gKyBybm9ybShuLCAwLCAxNSkKZml0X2Zha2UgPC0gc3Rhbl9nbG0oZmluYWxfZmFrZSB+IG1pZHRlcm0sIGRhdGEgPSBpbnRyb2NsYXNzLCByZWZyZXNoID0gMCkKYGBgCgojIyMjIENvbXB1dGUgcmVzaWR1YWxzCmNvbXB1dGUgcHJlZGljdGlvbnMgZnJvbSBzaW11bGF0aW9ucwoKYGBge3IgfQpzaW1zIDwtIGFzLm1hdHJpeChmaXRfZmFrZSkKcHJlZGljdGVkX2Zha2UgPC0gY29sTWVhbnMoc2ltc1ssMV0gKyBzaW1zWywyXSAlKiUgdChpbnRyb2NsYXNzJG1pZHRlcm0pKQpgYGAKCm9yIHdpdGggYnVpbHQtaW4gZnVuY3Rpb24KCmBgYHtyIH0KcHJlZGljdGVkX2Zha2UgPC0gcHJlZGljdChmaXRfZmFrZSkKcmVzaWRfZmFrZSA8LSBpbnRyb2NsYXNzJGZpbmFsX2Zha2UgLSBwcmVkaWN0ZWRfZmFrZQpgYGAKCiMjIyMgUGxvdCByZXNpZHVhbHMgdnMgcHJlZGljdGVkCgpgYGB7ciBldmFsPUZBTFNFLCBpbmNsdWRlPUZBTFNFfQppZiAoc2F2ZWZpZ3MpIHBvc3RzY3JpcHQocm9vdCgiSW50cm9jbGFzcy9maWdzIiwiZmFrZXJlc2lkMmEucHMiKSwgaGVpZ2h0PTMuOCwgd2lkdGg9NC41LCBjb2xvcm1vZGVsPSJncmF5IikKYGBgCmBgYHtyIH0KcGxvdChwcmVkaWN0ZWRfZmFrZSwgcmVzaWRfZmFrZSwgeGxhYj0icHJlZGljdGVkIHZhbHVlIiwgeWxhYj0icmVzaWR1YWwiLCBtYWluPSJGYWtlIGRhdGE6ICByZXNpZHMgdnMuXCBwcmVkaWN0ZWQiLCBtZ3A9YygxLjUsLjUsMCksIHBjaD0yMCwgeWF4dD0ibiIpCmF4aXMoMiwgc2VxKC00MCw0MCwyMCksIG1ncD1jKDEuNSwuNSwwKSkKYWJsaW5lKDAsIDAsIGNvbD0iZ3JheSIsIGx3ZD0uNSkKYGBgCmBgYHtyIGV2YWw9RkFMU0UsIGluY2x1ZGU9RkFMU0V9CmlmIChzYXZlZmlncykgZGV2Lm9mZigpCmBgYAoKIyMjIyBQbG90IHJlc2lkdWFscyB2cyBvYnNlcnZlZAoKYGBge3IgZXZhbD1GQUxTRSwgaW5jbHVkZT1GQUxTRX0KaWYgKHNhdmVmaWdzKSBwb3N0c2NyaXB0KHJvb3QoIkludHJvY2xhc3MvZmlncyIsImZha2VyZXNpZDJiLnBzIiksIGhlaWdodD0zLjgsIHdpZHRoPTQuNSwgY29sb3Jtb2RlbD0iZ3JheSIpCmBgYApgYGB7ciB9CnBsb3QoaW50cm9jbGFzcyRmaW5hbF9mYWtlLCByZXNpZF9mYWtlLCB4bGFiPSJvYnNlcnZlZCB2YWx1ZSIsIHlsYWI9InJlc2lkdWFsIiwgbWFpbj0iRmFrZSBkYXRhOiAgcmVzaWRzIHZzLlwgb2JzZXJ2ZWQiLCBtZ3A9YygxLjUsLjUsMCksIHBjaD0yMCwgeWF4dD0ibiIpCmF4aXMoMiwgc2VxKC00MCw0MCwyMCksIG1ncD1jKDEuNSwuNSwwKSkKYWJsaW5lKDAsIDAsIGNvbD0iZ3JheSIsIGx3ZD0uNSkKYGBgCmBgYHtyIGV2YWw9RkFMU0UsIGluY2x1ZGU9RkFMU0V9CmlmIChzYXZlZmlncykgZGV2Lm9mZigpCmBgYAoK