library(rprojroot)
root <- has_file(".Bayesian-Workflow-root")$make_fix_file()
library(cmdstanr)
# CmdStanR output directory makes Quarto cache to work
dir.create(root("dogs", "stan_output"), showWarnings = FALSE)
options(cmdstanr_output_dir = root("dogs", "stan_output"))
options(mc.cores = 4)
library(posterior)
library(MASS)
library(arm)
set.seed(123)Posterior predictive checking: Stochastic learning in dogs
This notebook includes the CmdStanR code for the Bayesian Workflow book Chapter 21 Posterior predictive checking: Stochastic learning in dogs.
1 Introduction
We analyse stochastic learning in dogs data by Bush and Mosteller (1955).
Load packages
2 Data
dogs <- read.table(root("dogs", "data", "dogs.dat"), skip = 2)
shock <- ifelse(as.matrix(dogs[, 2:26]) == "S", 1, 0)
dogs_data <- list(y = shock, J = nrow(shock), T = ncol(shock))3 Models
dogs_0 <- cmdstan_model(root("dogs", "dogs_0.stan"))fit_0 <- dogs_0$sample(data = dogs_data, refresh = 0)print(fit_0) variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
lp__ -286.85 -286.54 0.99 0.73 -288.84 -285.90 1.00 1496 2044
alpha 2.37 2.36 0.22 0.22 2.00 2.73 1.00 997 1285
beta -0.30 -0.30 0.02 0.02 -0.33 -0.26 1.00 990 1341
p[1,1] 0.89 0.89 0.02 0.02 0.85 0.92 1.00 1021 1380
p[2,1] 0.89 0.89 0.02 0.02 0.85 0.92 1.00 1021 1380
p[3,1] 0.89 0.89 0.02 0.02 0.85 0.92 1.00 1021 1380
p[4,1] 0.89 0.89 0.02 0.02 0.85 0.92 1.00 1021 1380
p[5,1] 0.89 0.89 0.02 0.02 0.85 0.92 1.00 1021 1380
p[6,1] 0.89 0.89 0.02 0.02 0.85 0.92 1.00 1021 1380
p[7,1] 0.89 0.89 0.02 0.02 0.85 0.92 1.00 1021 1380
# showing 10 of 1503 rows (change via 'max_rows' argument or 'cmdstanr_max_rows' option)
dogs_1 <- cmdstan_model(root("dogs", "dogs_1.stan"))fit_1 <- dogs_1$sample(data = dogs_data, refresh = 0)print(fit_1)Warning: NAs introduced by coercion
Warning: NAs introduced by coercion
variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
lp__ -289.05 -288.76 0.74 0.31 -290.49 -288.53 1.00 1943 2814
a 0.87 0.87 0.01 0.01 0.86 0.88 1.00 1483 1885
p[1,1] 1.00 1.00 0.00 0.00 1.00 1.00 NA NA NA
p[2,1] 1.00 1.00 0.00 0.00 1.00 1.00 NA NA NA
p[3,1] 1.00 1.00 0.00 0.00 1.00 1.00 NA NA NA
p[4,1] 1.00 1.00 0.00 0.00 1.00 1.00 NA NA NA
p[5,1] 1.00 1.00 0.00 0.00 1.00 1.00 NA NA NA
p[6,1] 1.00 1.00 0.00 0.00 1.00 1.00 NA NA NA
p[7,1] 1.00 1.00 0.00 0.00 1.00 1.00 NA NA NA
p[8,1] 1.00 1.00 0.00 0.00 1.00 1.00 NA NA NA
# showing 10 of 1502 rows (change via 'max_rows' argument or 'cmdstanr_max_rows' option)
dogs_2 <- cmdstan_model(root("dogs", "dogs_2.stan"))fit_2 <- dogs_2$sample(data = dogs_data, refresh = 0)print(fit_2)Warning: NAs introduced by coercion
Warning: NAs introduced by coercion
variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
lp__ -279.42 -279.13 0.99 0.71 -281.34 -278.48 1.00 1582 1738
a 0.92 0.92 0.01 0.01 0.90 0.94 1.00 1545 2097
b 0.79 0.79 0.02 0.02 0.76 0.82 1.00 1498 1822
y_rep[1,1] 1.00 1.00 0.00 0.00 1.00 1.00 NA NA NA
y_rep[2,1] 1.00 1.00 0.00 0.00 1.00 1.00 NA NA NA
y_rep[3,1] 1.00 1.00 0.00 0.00 1.00 1.00 NA NA NA
y_rep[4,1] 1.00 1.00 0.00 0.00 1.00 1.00 NA NA NA
y_rep[5,1] 1.00 1.00 0.00 0.00 1.00 1.00 NA NA NA
y_rep[6,1] 1.00 1.00 0.00 0.00 1.00 1.00 NA NA NA
y_rep[7,1] 1.00 1.00 0.00 0.00 1.00 1.00 NA NA NA
# showing 10 of 753 rows (change via 'max_rows' argument or 'cmdstanr_max_rows' option)
dogs_3 <- cmdstan_model(root("dogs", "dogs_3.stan"))fit_3 <- dogs_3$sample(data = dogs_data, refresh = 0)print(fit_3, variables = c("mu_logit_a", "sigma_logit_a")) variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
mu_logit_a 1.87 1.87 0.08 0.08 1.73 2.01 1.00 2018 2246
sigma_logit_a 0.32 0.31 0.10 0.09 0.16 0.48 1.00 434 308
dogs_4 <- cmdstan_model(root("dogs", "dogs_4.stan"))fit_4 <- dogs_4$sample(data = dogs_data, refresh = 0)print(fit_4, variables = c("mu_logit_ab", "Sigma_logit_ab")) variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
mu_logit_ab[1] 2.42 2.41 0.22 0.21 2.07 2.79 1.00 1705 2497
mu_logit_ab[2] 1.32 1.32 0.23 0.22 0.96 1.70 1.01 1883 2570
Sigma_logit_ab[1,1] 0.51 0.43 0.37 0.31 0.09 1.21 1.02 385 636
Sigma_logit_ab[2,1] -0.36 -0.28 0.37 0.28 -1.04 0.05 1.02 375 853
Sigma_logit_ab[1,2] -0.36 -0.28 0.37 0.28 -1.04 0.05 1.02 375 853
Sigma_logit_ab[2,2] 0.73 0.57 0.60 0.41 0.13 1.82 1.03 398 670
dogs_5 <- cmdstan_model(root("dogs", "dogs_5.stan"))fit_5 <- dogs_5$sample(data = dogs_data, refresh = 0)print(fit_5, variables = c("mu_logit_ab", "sigma_logit_ab", "Omega_logit_ab[1,2]",
"a[1]", "b[1]")) variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
mu_logit_ab[1] 2.43 2.43 0.18 0.17 2.13 2.73 1.00 2471 2674
mu_logit_ab[2] 1.29 1.29 0.15 0.15 1.05 1.54 1.00 2435 2459
sigma_logit_ab[1] 0.31 0.29 0.19 0.21 0.03 0.65 1.00 1191 1615
sigma_logit_ab[2] 0.39 0.36 0.24 0.22 0.05 0.82 1.00 927 1125
Omega_logit_ab[1,2] -0.06 -0.09 0.40 0.43 -0.68 0.63 1.00 2052 2255
a[1] 0.90 0.91 0.05 0.03 0.81 0.95 1.00 2425 3085
b[1] 0.74 0.76 0.08 0.06 0.59 0.83 1.00 2385 2951
4 Plots
empty_plot <- function(a = "") {
plot(0, 0, bty = "n", xaxt = "n", yaxt = "n", type = "n")
text(0, 0, a, cex = .8)
}
plot_dogs <- function(y, ...) {
J <- nrow(y)
T <- ncol(y)
max_y_times <- rep(NA, J)
for (j in 1:J) {
max_y_times[j] <- max((1:T)[y[j, ] == 1])
}
y_ordered <- y[rev(order(max_y_times)), ]
image(t(y_ordered), bty = "n", xaxt = "n", yaxt = "n", ...)
}
plot_ppc <- function(fit, label){
post <- as_draws_rvars(fit$draws())
empty_plot(label)
for (k in 1:3) {
for (i in sample(1000, 2)) {
rep <- sum(subset_draws(post, iter = i, chain = k)$y_rep)
plot_dogs(rep)
}
}
}par(mfrow = c(7, 7), mar = c(.5, .5, .5, .5))
empty_plot("Real dogs")
plot_dogs(shock)
for (k in 1:5){
empty_plot()
}
plot_ppc(fit_0, "PPsims from M0:\nlogit model")
plot_ppc(fit_1, "PPsims from M1:\n1-parameter\nlog model")
plot_ppc(fit_2, "PPsims from M2:\n2-parameter\nlog model")
plot_ppc(fit_3, "PPsims from M3:\nhier 1-par\nlog model")
plot_ppc(fit_4, "PPsims from M4:\nhier 2-par\nlog model")
plot_ppc(fit_5, "PPsims from M5:\nhier 2-par\nlog model\nwith prior")post <- as_draws_rvars(fit_5$draws())
par(mfrow = c(2, 5), pty = "s",
mar = c(2.5, 2.5, 0.5, 0.5), mgp = c(1.5, 0.2, 0),
tck = -0.02, oma = c(0, 0, 1, 0))
for (k in 1:2){
index <- sample(1000, 5)
for (i in 1:5) {
a_sim <- sum(subset_draws(post, iter = index[i], chain = k)$a)
b_sim <- sum(subset_draws(post, iter = index[i], chain = k)$b)
plot(c(0.55, 1), c(0.55, 1),
xlab= if (k == 2) "a" else "", ylab = if (i == 1) "b" else "",
xaxs = "i", yaxs = "i", xaxt = "n", yaxt = "n", type = "n")
if (k==2) axis(1, c(0.6, 0.8, 1), c("0.6", "0.8", "1")) else axis(1, c(0.6, 0.8, 1), c("", "", ""))
if (i==1) axis(2, c(0.6, 0.8, 1), c("0.6", "0.8", "1")) else axis(1, c(0.6, 0.8, 1), c("", "", ""))
abline(0, 1, lwd = .5, col = "gray")
points(a_sim, b_sim, pch = 20, cex = .6)
mtext("10 posterior simulations of the parameters of the 30 dogs",
3, 0, cex = 0.8, outer = TRUE)
}
}post <- as_draws_rvars(fit_5$draws())
par(pty = "s", mar = c(3, 3.5, 2, 1), mgp = c(2, .5, 0), tck = -.01)
plot(median(post$a), median(post$b),
xlim = c(0.55, 1), ylim = c(0.55, 1), xaxs = "i", yaxs = "i",
xlab = expression(hat(a)), ylab = expression(hat(b)), pch = 20, cex = 0.6,
main = "Posterior medians from fitted model", cex.main = 0.9)
abline(0,1,lwd=.5, col="gray")
new_dogs_mu_logit_ab <- c(2.4, 1.3)
new_dogs_sigma_ab <- c(0.32, 0.40)
new_dogs_rho_ab <- 0
new_dogs_Sigma_ab <-
diag(new_dogs_sigma_ab) %*%
rbind(c(1, new_dogs_rho_ab), c(new_dogs_rho_ab, 1)) %*%
diag(new_dogs_sigma_ab)
J <- 30
new_dogs_ab <- invlogit(mvrnorm(J, new_dogs_mu_logit_ab, new_dogs_Sigma_ab))
a <- new_dogs_ab[, 1]
b <- new_dogs_ab[, 2]
T <- 25
new_dogs <- array(NA, c(J, T))
for (j in 1:J) {
prev_shock <- 0
prev_avoid <- 0
new_dogs[j, 1] <- 1
for (t in 2:T) {
prev_shock = prev_shock + new_dogs[j, t - 1]
prev_avoid = prev_avoid + 1 - new_dogs[j, t - 1]
p = a[j] ^ prev_shock * b[j] ^ prev_avoid
new_dogs[j, t] <- rbinom(1, 1, p)
}
}
new_dogs_data <- list(y = new_dogs, J = J, T = T)new_fit_5 <- dogs_5$sample(data = new_dogs_data, refresh = 0)print(new_fit_5, variables = c("mu_logit_ab", "sigma_logit_ab", "Omega_logit_ab[1,2]",
"a[1]", "a[2]", "b[1]", "b[2]")) variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
mu_logit_ab[1] 2.34 2.34 0.17 0.16 2.08 2.61 1.00 2842 2652
mu_logit_ab[2] 1.38 1.38 0.15 0.14 1.13 1.63 1.00 2662 2386
sigma_logit_ab[1] 0.21 0.18 0.15 0.16 0.02 0.51 1.00 1598 1818
sigma_logit_ab[2] 0.36 0.34 0.21 0.21 0.04 0.72 1.00 1052 1508
Omega_logit_ab[1,2] 0.05 0.04 0.37 0.40 -0.56 0.69 1.00 2789 2752
a[1] 0.90 0.91 0.04 0.02 0.83 0.94 1.00 2950 3015
a[2] 0.90 0.91 0.03 0.02 0.85 0.94 1.00 3521 3015
b[1] 0.78 0.79 0.05 0.04 0.67 0.85 1.00 3477 3293
b[2] 0.77 0.78 0.07 0.05 0.63 0.85 1.00 2531 3219
par(pty = "s", mar = c(3, 3.5, 2, 1), mgp = c(2, 0.5, 0), tck = -0.01)
plot(a, b, xlim = c(0.55, 1), ylim = c(0.55, 1),
xaxs = "i", yaxs = "i", xlab = "a", ylab = "b", pch = 20, cex = 0.6,
main = "Simulated parameters", cex.main = 0.9)
abline(0, 1, lwd = 0.5, col = "gray")par(pty = "m", mar = c(1, 2, 2, 1))
plot_dogs(new_dogs, main = "Simulated data", cex.main = 0.9)post <- as_draws_rvars(new_fit_5$draws())
par(pty = "s", mar = c(3, 3.5, 2, 1), mgp = c(2, 0.5, 0), tck = -0.01)
plot(0, 0, xlim = c(0.55, 1), ylim = c(0.55, 1),
xlab = "Posterior inference", ylab = "True parameter value",
xaxs = "i", yaxs = "i", pch = 20, cex = 0.6,
main = "Calibration check of posterior intervals", cex.main = 0.9)
abline(0, 1, lwd = 0.5, col = "gray")
for (j in 1:J){
points(median(post$a[j]), a[j], pch = 20, cex = 0.6, col = "blue")
lines(quantile(post$a[j], c(0.25, 0.75)), rep(a[j], 2), lwd = 0.5, col = "blue")
points(median(post$b[j]), b[j], pch = 20, cex = 0.6, col = "red")
lines(quantile(post$b[j], c(0.25, 0.75)), rep(b[j], 2), lwd = 0.5, col = "red")
}
J <- 300
new_dogs_ab <- invlogit(mvrnorm(J, new_dogs_mu_logit_ab, new_dogs_Sigma_ab))
a <- new_dogs_ab[, 1]
b <- new_dogs_ab[, 2]
T <- 25
new_dogs <- array(NA, c(J, T))
for (j in 1:J) {
prev_shock <- 0
prev_avoid <- 0
new_dogs[j, 1] <- 1
for (t in 2:T) {
prev_shock = prev_shock + new_dogs[j, t - 1]
prev_avoid = prev_avoid + 1 - new_dogs[j, t - 1]
p = a[j] ^ prev_shock * b[j] ^ prev_avoid
new_dogs[j, t] <- rbinom(1, 1, p)
}
}
new_dogs_data <- list(y = new_dogs, J = J, T = T)new_fit_5 <- dogs_5$sample(data = new_dogs_data, refresh = 0)print(new_fit_5, variables = c("mu_logit_ab", "sigma_logit_ab", "Omega_logit_ab[1,2]",
"a[1]", "a[2]", "b[1]", "b[2]")) variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
mu_logit_ab[1] 2.41 2.41 0.06 0.06 2.31 2.51 1.01 1731 2840
mu_logit_ab[2] 1.22 1.22 0.05 0.05 1.14 1.31 1.01 2178 2878
sigma_logit_ab[1] 0.35 0.35 0.10 0.09 0.18 0.49 1.02 480 581
sigma_logit_ab[2] 0.50 0.50 0.09 0.09 0.36 0.65 1.00 1277 2115
Omega_logit_ab[1,2] -0.22 -0.25 0.19 0.16 -0.47 0.13 1.00 861 1337
a[1] 0.91 0.91 0.03 0.03 0.85 0.95 1.00 4165 2816
a[2] 0.94 0.94 0.02 0.02 0.90 0.96 1.00 3946 2612
b[1] 0.85 0.85 0.04 0.04 0.78 0.92 1.00 4318 2290
b[2] 0.79 0.80 0.06 0.06 0.68 0.88 1.00 5623 2970
T <- 50
new_dogs <- array(NA, c(J, T))
for (j in 1:J){
prev_shock <- 0
prev_avoid <- 0
new_dogs[j,1] <- 1
for (t in 2:T){
prev_shock = prev_shock + new_dogs[j,t-1]
prev_avoid = prev_avoid + 1 - new_dogs[j,t-1]
p = a[j]^prev_shock * b[j]^prev_avoid
new_dogs[j,t] <- rbinom(1, 1, p)
}
}
new_dogs_data <- list(y = new_dogs, J = J, T = T)new_fit_5 <- dogs_5$sample(data = new_dogs_data, refresh = 0)print(new_fit_5, variables = c("mu_logit_ab", "sigma_logit_ab", "Omega_logit_ab[1,2]",
"a[1]", "a[2]", "b[1]", "b[2]")) variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
mu_logit_ab[1] 2.46 2.46 0.06 0.06 2.36 2.56 1.00 1858 2542
mu_logit_ab[2] 1.22 1.22 0.05 0.05 1.14 1.30 1.00 1944 2283
sigma_logit_ab[1] 0.37 0.37 0.09 0.08 0.22 0.50 1.01 533 541
sigma_logit_ab[2] 0.48 0.48 0.07 0.07 0.38 0.60 1.00 1112 1741
Omega_logit_ab[1,2] -0.21 -0.23 0.17 0.16 -0.45 0.11 1.02 601 974
a[1] 0.94 0.94 0.02 0.02 0.91 0.96 1.00 3808 2403
a[2] 0.91 0.92 0.03 0.03 0.85 0.95 1.00 3922 2734
b[1] 0.82 0.82 0.05 0.05 0.73 0.89 1.00 4146 2776
b[2] 0.71 0.72 0.08 0.08 0.57 0.83 1.00 4294 3248
par(pty = "m", mar = c(1, 2, 2, 1))
plot_dogs(new_dogs, main = "Simulated data: 50 trials", cex.main = 0.9)post <- as_draws_rvars(new_fit_5$draws())
par(pty = "s", mar = c(3, 3.5, 2, 1), mgp = c(2, 0.5, 0), tck = -0.01)
plot(0, 0, xlim = c(0.55, 1), ylim = c(0.55, 1),
xlab = "Posterior inference", ylab = "True parameter value",
xaxs = "i", yaxs = "i", pch = 20, cex = 0.6,
main = "Calibration check based on 50 trials", cex.main = 0.9)
abline(0, 1, lwd = 0.5, col = "gray")
for (j in 1:J){
points(median(post$a[j]), a[j], pch = 20, cex = 0.6, col = "blue")
lines(quantile(post$a[j], c(0.25, 0.75)), rep(a[j], 2), lwd = 0.5, col = "blue")
points(median(post$b[j]), b[j], pch = 20, cex = 0.6, col = "red")
lines(quantile(post$b[j], c(0.25, 0.75)), rep(b[j], 2), lwd = 0.5, col = "red")
}References
Bush, R. R., and F. Mosteller. 1955. Stochastic Models for Learning. Wiley.
Licenses
- Code © 2023–2025, Andrew Gelman, licensed under BSD-3.
- Text © 2023–2025, Andrew Gelman, licensed under CC-BY-NC 4.0.