Modeling performance on a multiple choice exam

Author

Andrew Gelman

Published

2022-08-22

Modified

2026-06-15

This notebook includes the code for the Bayesian Workflow book Chapter 4 Introduction to workflow: Modeling performance on a multiple choice exam.

1 Introduction

We demonstrate with an example from Section 4.15 of Gelman and Vehtari (2024), assessing the grading of a multiple-choice test. We analyze data from a 24-question final exam from a class of 32 students, where our applied goal is to check that the individual test questions are doing a good job at discriminating between poorly- and well-performing students. No external data are available on the students, so we assess their abilities using their total score on the exam.

Each item is a multiple choice question with 4 possible answers and is scored either 1 (correct) or 0 (incorrect). The total scores across the 24 question exam range from 12 to 21, with an average of 16. Across the questions the hardest question has 4 of the 32 students answering it correctly, while all students manage to answer the easiest question correctly.

Load packages

library(rprojroot)
root <- has_file(".Bayesian-Workflow-root")$make_fix_file()
library(cmdstanr)
options(mc.cores = 4)
library(posterior)
library(arm)
set.seed(123)
# Bring in plotting functions from a separate file
source(root("multiple_choice", "plot_functions.R"))

print_stan_code <- function(code) {
  if (isTRUE(getOption("knitr.in.progress")) &
        identical(knitr::opts_current$get("results"), "asis")) {
    # In render: emit as-is so Pandoc/Quarto does syntax highlighting
    block <- paste0("```stan", "\n", paste(code, collapse = "\n"), "\n", "```")
    knitr::asis_output(block)
  } else {
    writeLines(code)
  }
}

2 Stan models

Compile all Stan programs to use throughout the file

logit_0 <- cmdstan_model(root("multiple_choice", "logit_0.stan"))
logit_prior <- cmdstan_model(root("multiple_choice", "logit_prior.stan"))
logit_guessing <- cmdstan_model(root("multiple_choice", "logit_guessing.stan"))
logit_guessing_uncentered <- cmdstan_model(root("multiple_choice", "logit_guessing_uncentered.stan"))
logit_guessing_multilevel <- cmdstan_model(root("multiple_choice", "logit_guessing_multilevel.stan"))
logit_guessing_uncentered_multilevel <- cmdstan_model(root("multiple_choice", "logit_guessing_uncentered_multilevel.stan"))
logit_guessing_multilevel_bivariate <- cmdstan_model(root("multiple_choice", "logit_guessing_multilevel_bivariate.stan"))
logit_guessing_multilevel_bivariate_cholesky <- cmdstan_model(root("multiple_choice", "logit_guessing_multilevel_bivariate_cholesky.stan"))
irt_guessing <- cmdstan_model(root("multiple_choice", "irt_guessing.stan"))
irt_guessing_discrimination <- cmdstan_model(root("multiple_choice", "irt_guessing_discrimination.stan"))

3 Data

Read in data and construct score for each student

responses <- read.csv(root("multiple_choice", "data", "final_exam_responses.csv"))
answers <- read.csv(root("multiple_choice", "data", "final_exam_answers.csv"))
J <- nrow(responses)  # number of students
K <- ncol(responses)  # number of items
correct <- array(NA, c(J,K))
for (k in 1:K){
  correct[,k] <- ifelse(responses[,k] == as.character(answers[k]), 1, 0)
}
score <- rowSums(correct)
item <- colSums(correct)
summary(score)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  12.00   14.75   16.00   16.09   17.00   21.00

summary(item)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   4.00   15.25   24.00   21.46   27.25   32.00

score_jitt <- score + jitter(rep(0, J), amount = 0.3)
score_adj <- (score - mean(score)) / sd(score)
score_adj_jitt <- (score_jitt - mean(score)) / sd(score)
data <- list(J = J, x = score, y = correct)

item_id_0 <- LETTERS[1:J]  # Only works here because J is no more than 26!

4 Simple models

4.1 Base model logit_0

print_stan_code(logit_0$code())

data {
  int J;
  array[J] int<lower=0, upper=1> y;
  vector[J] x;
}
parameters {
  real a, b;
}
model {
  y ~ bernoulli_logit(a + b*x);
}

plot_logit(
  "Fit to item A on exam",
  "Score on exam",
  logit_0,
  list(J = data$J, x = data$x, y = data$y[, 1]),
  score_jitt,
  guessprob = 0
)

 variable   mean median   sd  mad     q5    q95 rhat ess_bulk ess_tail
     lp__ -14.30 -13.92 1.20 0.78 -16.55 -13.23 1.01      681      646
     a    -11.85 -11.26 5.19 4.78 -21.06  -4.51 1.01      344      370
     b      0.85   0.81 0.35 0.32   0.36   1.47 1.01      341      367

4.2 Add priors

print_stan_code(logit_prior$code())

data {
  int J;
  array[J] int<lower=0, upper=1> y;
  vector[J] x;
  real mu_a, mu_b;
  real<lower=0> sigma_a, sigma_b;
}
transformed data {
  vector[J] x_adj = (x - mean(x))/sd(x);
}
parameters {
  real a, b;
}
model {
  a ~ normal(mu_a, sigma_a);
  b ~ normal(mu_b, sigma_b);
  y ~ bernoulli_logit(a + b*x_adj);
}

plot_logit(
  "Fit to item A:  rescaled predictor and weakly informative prior",
  "Standardized exam score",
  logit_prior,
  list(J = data$J, x = data$x, y = data$y[, 1], 
       mu_a = 0, sigma_a = 5, mu_b = 0, sigma_b = 5),
  score_adj_jitt,
  guessprob = 0
)

 variable   mean median   sd  mad     q5    q95 rhat ess_bulk ess_tail
     lp__ -14.35 -14.03 1.05 0.76 -16.39 -13.34 1.00     1449     1767
     a      1.76   1.72 0.62 0.62   0.83   2.88 1.00     1635     1542
     b      1.87   1.82 0.71 0.70   0.79   3.09 1.00     1587     1697

plot_logit_grid(
  "Rescaled predictor and weakly informative prior",
  "Standardized exam score",
  logit_prior,
  c(data, list(mu_a = 0, sigma_a = 5, mu_b = 0, sigma_b = 5)),
  score_adj_jitt,
  item_id_0,
  guessprob = 0
)

plot_logit(
  "Fit to item G: rescaled predictor and weakly informative prior",
  "Standardized exam score",
  logit_prior,
  list(J = data$J, x = data$x, y = data$y[, 7], 
       mu_a = 0, sigma_a = 5, mu_b = 0, sigma_b = 5),
  score_adj_jitt,
  guessprob = 0
)

 variable  mean median   sd  mad    q5   q95 rhat ess_bulk ess_tail
     lp__ -1.74  -1.42 1.01 0.73 -3.85 -0.77 1.00     1010     1691
     a     7.68   7.27 2.72 2.68  3.96 12.73 1.00     1152     1717
     b     0.08   0.02 2.09 1.96 -3.31  3.64 1.00      824     1142

4.3 Fix the data problems

answers[c(5, 14, 17)] <- c("d", "d", "c")
correct <- array(NA, c(J,K))
for (k in 1:K){
  correct[,k] <- ifelse(responses[,k] == as.character(answers[k]), 1, 0)
}
score <- rowSums(correct)
score_jitt <- score + jitter(rep(0, J), amount = 0.3)
score_adj <- (score - mean(score)) / sd(score)
score_adj_jitt <- (score_jitt - mean(score)) / sd(score)
data <- list(J = J, x = score, y = correct)
item_id <- rank(colSums(correct), ties = "first")

plot_logit_grid(
  "After fixing the data problem",
  "Standardized exam score",
  logit_prior,
  c(data, list(mu_a = 0, sigma_a = 5, mu_b = 0, sigma_b = 5)),
  score_adj_jitt,
  item_id,
  guessprob = 0
)

4.4 Allow for guessing

print_stan_code(logit_guessing$code())

data {
  int J;
  array[J] int<lower=0, upper=1> y;
  vector[J] x;
  real mu_a, mu_b;
  real<lower=0> sigma_a, sigma_b;
}
transformed data {
  vector[J] x_adj = (x - mean(x))/sd(x);
}
parameters {
  real a, b;
}
model {
  a ~ normal(mu_a, sigma_a);
  b ~ normal(mu_b, sigma_b);
  y ~ bernoulli(0.25 + 0.75*inv_logit(a + b*x_adj));
}

plot_logit_grid(
  "Probabilities constrained to range from 0.25 to 1",
  "Standardized exam score",
  logit_guessing,
  c(data, list(mu_a = 0, sigma_a = 5, mu_b = 0, sigma_b = 5)),
  score_adj_jitt,
  item_id,
  guessprob = 0.25
)

5 Multilevel models

In preparation for multilevel model, create long dataset

N <- J*K
y <- rep(NA, N)
student <- rep(NA, N)
item <- rep(NA, N)
count <- 0
for (j in 1:J){
  for (k in 1:K){
    count <- count + 1
    y[count] <- correct[j,k]
    student[count] <- j
    item[count] <- k
  }
}
longdata <- list(
  N = N, J = J, K = K, 
  student = student, 
  item = item, 
  y = y, 
  x = score
)

5.1 Multilevel model

print_stan_code(logit_guessing_multilevel$code())

data {
  int N;   // number of observations
  int J;   // number of students
  int K;   // number of items on exam
  array[N] int<lower=0, upper=J> student;
  array[N] int<lower=0, upper=K> item;
  array[N] int<lower=0, upper=1> y;
  vector[J] x;
  real mu_mu_a, mu_mu_b;
  real<lower=0> sigma_mu_a, sigma_mu_b, mu_sigma_a, mu_sigma_b;
}
transformed data {
  vector[J] x_adj = (x - mean(x))/sd(x);
}
parameters {
  real mu_a, mu_b;
  real<lower=0> sigma_a, sigma_b;
  vector<offset=mu_a, multiplier=sigma_a>[K] a;
  vector<offset=mu_b, multiplier=sigma_b>[K] b;
}
model {
  a ~ normal(mu_a, sigma_a);
  b ~ normal(mu_b, sigma_b);
  mu_a ~ normal(mu_mu_a, sigma_mu_a);
  mu_b ~ normal(mu_mu_b, sigma_mu_b);
  sigma_a ~ exponential(1/mu_sigma_a);
  sigma_b ~ exponential(1/mu_sigma_b);
  y ~ bernoulli(0.25 + 0.75*inv_logit(a[item] + b[item] .* x_adj[student]));
}

fit_5 <- plot_logit_grid_2(
  "Multilevel model, partially pooling across the 24 exam questions",
  "Standardized exam score",
  logit_guessing_multilevel,
  c(longdata, list(
    mu_mu_a = 0, sigma_mu_a = 5, 
    mu_mu_b = 0, sigma_mu_b = 5,
    mu_sigma_a = 5, mu_sigma_b = 5
  )),
  score_adj_jitt,
  item_id,
  guessprob = 0.25
)

print(fit_5, variables = c("mu_a", "sigma_a", "mu_b", "sigma_b"))

 variable mean median   sd  mad   q5  q95 rhat ess_bulk ess_tail
  mu_a    0.87   0.87 0.39 0.37 0.26 1.52 1.00      709     1123
  sigma_a 1.67   1.62 0.38 0.35 1.15 2.35 1.00     1306     2183
  mu_b    1.09   1.08 0.17 0.16 0.82 1.39 1.00     3529     1616
  sigma_b 0.25   0.21 0.20 0.19 0.02 0.62 1.00     1466     1733

5.2 Multilevel model with correlation

print_stan_code(logit_guessing_multilevel_bivariate$code())

data {
  int N;   // number of observations
  int J;   // number of students
  int K;   // number of items on exam
  array[N] int<lower=0, upper=J> student;
  array[N] int<lower=0, upper=K> item;
  array[N] int<lower=0, upper=1> y;
  vector[J] x;
  vector[2] mu_mu_ab;
  vector<lower=0>[2] sigma_mu_ab, mu_sigma_ab;
}
transformed data {
  vector[J] x_adj = (x - mean(x))/sd(x);
}
parameters {
  vector[2] mu_ab;
  vector<lower=0>[2] sigma_ab;
  array[K] vector[2] e_ab;
  corr_matrix[2] Omega_ab;
}
transformed parameters {
  vector[K] a;
  vector[K] b;
  for (k in 1:K) {
    a[k] = mu_ab[1] + sigma_ab[1] * e_ab[k][1]; 
    b[k] = mu_ab[2] + sigma_ab[2] * e_ab[k][2];
  }
}
model {
  e_ab ~ multi_normal([0,0], Omega_ab);
  mu_ab ~ normal(mu_mu_ab, sigma_mu_ab);
  sigma_ab ~ exponential(1/mu_sigma_ab);
  Omega_ab ~ lkj_corr(1);
  y ~ bernoulli(0.25 + 0.75*inv_logit(a[item] + b[item] .* x_adj[student]));
}

fit_6 <- plot_logit_grid_2(
  "Multilevel model with correlation",
  "Standardized exam score",
  logit_guessing_multilevel_bivariate,
  c(longdata, list(
    mu_mu_ab = c(0, 0),
    sigma_mu_ab = c(5, 10),
    mu_sigma_ab = c(5, 10)
  )),
  score_adj_jitt,
  item_id,
  guessprob = 0.25
)

print(fit_6, variables = c("mu_ab", "sigma_ab", "Omega_ab"))

      variable  mean median   sd  mad    q5  q95 rhat ess_bulk ess_tail
 mu_ab[1]       0.82   0.82 0.37 0.36  0.23 1.41 1.00      910     1303
 mu_ab[2]       1.09   1.08 0.18 0.18  0.82 1.40 1.00     2588     2160
 sigma_ab[1]    1.68   1.63 0.37 0.34  1.16 2.35 1.00     1205     1871
 sigma_ab[2]    0.29   0.25 0.21 0.20  0.03 0.68 1.00      868     1651
 Omega_ab[1,1]  1.00   1.00 0.00 0.00  1.00 1.00   NA       NA       NA
 Omega_ab[2,1] -0.35  -0.47 0.50 0.51 -0.95 0.65 1.01      216      340
 Omega_ab[1,2] -0.35  -0.47 0.50 0.51 -0.95 0.65 1.01      216      340
 Omega_ab[2,2]  1.00   1.00 0.00 0.00  1.00 1.00   NA       NA       NA

5.3 Multilevel model with correlation using Cholesky

print_stan_code(logit_guessing_multilevel_bivariate_cholesky$code())

data {
  int N;   // number of observations
  int J;   // number of students
  int K;   // number of items on exam
  array[N] int<lower=0, upper=J> student;
  array[N] int<lower=0, upper=K> item;
  array[N] int<lower=0, upper=1> y;
  vector[J] x;
  vector[2] mu_mu_ab;
  vector<lower=0>[2] sigma_mu_ab, mu_sigma_ab;
}
transformed data {
  vector[J] x_adj = (x - mean(x))/sd(x);
}
parameters {
  vector[2] mu_ab;
  vector<lower=0>[2] sigma_ab;
  array[K] vector[2] e_ab;
  cholesky_factor_corr[2] L_ab;
}
transformed parameters {
  vector[K] a;
  vector[K] b;
  for (k in 1:K) {
    a[k] = mu_ab[1] + sigma_ab[1] * e_ab[k][1]; 
    b[k] = mu_ab[2] + sigma_ab[2] * e_ab[k][2];
  }
}
model {
  e_ab ~ multi_normal_cholesky([0,0], L_ab);
  mu_ab ~ normal(mu_mu_ab, sigma_mu_ab);
  sigma_ab ~ exponential(1/mu_sigma_ab);
  L_ab ~ lkj_corr_cholesky(1);
  y ~ bernoulli(0.25 + 0.75*inv_logit(a[item] + b[item] .* x_adj[student]));
}
generated quantities {
  corr_matrix[2] Omega_ab = multiply_lower_tri_self_transpose(L_ab);
}

fit_7 <- plot_logit_grid_2(
  "Multilevel model with correlation:  Cholesky parameterization",
  "Standardized exam score",
  logit_guessing_multilevel_bivariate_cholesky,
  c(longdata, list(
    mu_mu_ab = c(0, 0),
    sigma_mu_ab = c(5, 10),
    mu_sigma_ab = c(5, 10)
  )),
  score_adj_jitt,
  item_id,
  guessprob = 0.25
)

print(fit_7, variables = c("mu_ab", "sigma_ab", "Omega_ab"))

      variable  mean median   sd  mad    q5  q95 rhat ess_bulk ess_tail
 mu_ab[1]       0.81   0.80 0.38 0.36  0.18 1.44 1.01      707     1132
 mu_ab[2]       1.08   1.08 0.18 0.17  0.81 1.39 1.00     1740     1673
 sigma_ab[1]    1.69   1.64 0.36 0.34  1.19 2.35 1.00     1118     1711
 sigma_ab[2]    0.30   0.26 0.21 0.22  0.03 0.70 1.00      901     1844
 Omega_ab[1,1]  1.00   1.00 0.00 0.00  1.00 1.00   NA       NA       NA
 Omega_ab[2,1] -0.40  -0.52 0.48 0.49 -0.96 0.57 1.01      174      276
 Omega_ab[1,2] -0.40  -0.52 0.48 0.49 -0.96 0.57 1.01      174      276
 Omega_ab[2,2]  1.00   1.00 0.00 0.00  1.00 1.00   NA       NA       NA

6 Item-response theory (IRT) models

6.1 Item-response model

print_stan_code(irt_guessing$code())

data {
  int N;   // number of observations
  int J;   // number of students
  int K;   // number of items on exam
  array[N] int<lower=0, upper=J> student;
  array[N] int<lower=0, upper=K> item;
  array[N] int<lower=0, upper=1> y;
  real mu_mu_beta;
  real<lower=0> sigma_mu_beta, mu_sigma_alpha, mu_sigma_beta;
}
parameters {
  real mu_beta;
  real<lower=0> sigma_alpha, sigma_beta;
  vector<offset=0, multiplier=sigma_alpha>[J] alpha;
  vector<offset=mu_beta, multiplier=sigma_beta>[K] beta;
}
model {
  alpha ~ normal(0, sigma_alpha);
  beta ~ normal(mu_beta, sigma_beta);
  mu_beta ~ normal(mu_mu_beta, sigma_mu_beta);
  sigma_alpha ~ exponential(1/mu_sigma_alpha);
  sigma_beta ~ exponential(1/mu_sigma_beta);
  y ~ bernoulli(0.25 + 0.75*inv_logit(alpha[student] - beta[item]));
}

fit_11 <- plot_irt(
  "Item-response model",
  irt_guessing,
  c(longdata, list(
    mu_mu_beta = 0, sigma_mu_beta = 5,
    mu_sigma_alpha = 5, mu_sigma_beta = 5
  )),
  item_id,
  guessprob = 0.25
)

6.2 Item-response model with discrimination parameters

print_stan_code(irt_guessing_discrimination$code())

data {
  int N;   // number of observations
  int J;   // number of students
  int K;   // number of items on exam
  array[N] int<lower=0, upper=J> student;
  array[N] int<lower=0, upper=K> item;
  array[N] int<lower=0, upper=1> y;
  real mu_mu_beta;
  real<lower=0> sigma_mu_beta, mu_sigma_alpha, mu_sigma_beta, mu_sigma_gamma;
}
parameters {
  real mu_beta;
  real<lower=0> sigma_alpha, sigma_beta, sigma_gamma;
  vector<offset=0, multiplier=sigma_alpha>[J] alpha;
  vector<offset=mu_beta, multiplier=sigma_beta>[K] beta;
  vector<offset=1, multiplier=sigma_gamma>[K] gamma;
}
model {
  alpha ~ normal(0, sigma_alpha);
  beta ~ normal(mu_beta, sigma_beta);
  gamma ~ normal(1, sigma_gamma);
  mu_beta ~ normal(mu_mu_beta, sigma_mu_beta);
  sigma_alpha ~ exponential(1/mu_sigma_alpha);
  sigma_beta ~ exponential(1/mu_sigma_beta);
  sigma_gamma ~ exponential(1/mu_sigma_gamma);
  y ~ bernoulli(0.25 + 0.75*inv_logit(gamma[item] .* (alpha[student] - beta[item])));
}

fit_12 <- plot_irt(
  "Item-response model with discrimination parameters",
  irt_guessing_discrimination,
  c(longdata, list(
    mu_mu_beta = 0, sigma_mu_beta = 5,
    mu_sigma_alpha = 5, mu_sigma_beta = 5,
    guessprob = 0.25,
    mu_sigma_gamma = 0.5
  )),
  item_id,
  guessprob = 0.25
)

6.3 Item-response model with discrimination parameters with init

fit_13 <- plot_irt(
  "Item-response model with discrimination parameters",
  irt_guessing_discrimination,
  c(longdata, list(
    mu_mu_beta = 0, sigma_mu_beta = 5,
    mu_sigma_alpha = 5, mu_sigma_beta = 5,
    mu_sigma_gamma = 0.5
  )),
  item_id,
  guessprob = 0.25,
  init = 0.1
)

IRT plots

alpha_sims <- as.matrix(fit_13$draws("alpha", format = "df"))[, 1:J]
beta_sims <- as.matrix(fit_13$draws("beta", format = "df"))[, 1:K]
gamma_sims <- as.matrix(fit_13$draws("gamma", format = "df"))[, 1:K]
alpha_hat <- apply(alpha_sims, 2, median)
alpha_sd <- apply(alpha_sims, 2, mad)
beta_hat <- apply(beta_sims, 2, median)
beta_sd <- apply(beta_sims, 2, mad)
gamma_hat <- apply(gamma_sims, 2, median)
gamma_sd <- apply(gamma_sims, 2, mad)

par(mar = c(3, 0, 0, 0), mgp = c(1.5, .2, 0), tck = -.01)
rng <- range(
  alpha_hat - 3*alpha_sd,
  beta_hat - 3*beta_sd,
  alpha_hat + 3*alpha_sd,
  beta_hat + 3*beta_sd
)
plot(
  x = rng, 
  y = c(-1, 1),  
  xlab = "Posterior distributions for student abilities (above) and item difficulties (below)",
  ylab = "", yaxt = "n",
  bty = "n", type = "n"
)
for (j in 1:J){
  curve(dnorm(x, alpha_hat[j], alpha_sd[j]), col = "red", add = TRUE)
}
for (k in 1:K){
  curve(-dnorm(x, beta_hat[k], beta_sd[k]), col = "red", add = TRUE)
}

par(mar = c(2.5, 2.5, .5, .5), mgp = c(1.5, .2, 0), tck = -.01)
x_rng <- range(beta_hat - beta_sd, beta_hat + beta_sd)
y_rng <- range(gamma_hat - gamma_sd, gamma_hat + gamma_sd)
plot(
  x_rng,
  y_rng,
  xlab = expression(beta[k]),
  ylab = expression(gamma[k]),
  bty = "l", type = "n"
)
for (k in 1:K) {
  lines(
    beta_hat[k] + c(-1, 1) * 0,
    gamma_hat[k] + c(-1, 1) * gamma_sd[k],
    col = "red", lwd = .5
  )
  lines(
    beta_hat[k] + c(-1, 1) * beta_sd[k],
    gamma_hat[k] + c(-1, 1) * 0,
    col = "red", lwd = .5
  )
}
text(beta_hat, gamma_hat, item_id, col = "blue", cex = .9)

7 Prior predictive simulations

prior_predictive <- function(x, x_jitt, mu_a, sigma_a, mu_b, sigma_b) {
  a <- rnorm(1, mu_a, sigma_a)
  b <- rnorm(1, mu_b, sigma_b)
  y <- rbinom(length(x), 1, invlogit(a + b*x))
  plot(
    range(x), c(0, 1),
    xlab = "x", ylab = "y",
    xaxt = "n", yaxt = "n",
    yaxs = "i", bty = "l", type = "n"
  )
  axis(1, seq(-2,2,1))
  axis(2, c(0, 1))
  points(x_jitt, 0.5 + 0.96 * (y - 0.5), cex = .7, pch = 20, col = "blue")
}

par(oma = c(0, 0, 1.5, 0), mfrow = c(2, 5), mar = c(3, 3, 1, 1), 
    mgp = c(1.3, .2, 0), tck = -.01)
for (loop in 1:10) {
  prior_predictive(score_adj, score_adj_jitt, 0, 0.5, 0, 0.5)
}
mtext("10 prior predictive simulations with a ~ normal(0, 0.5) and b ~ normal(0, 0.5)", 
      side = 3, line = .5, outer = TRUE, cex = .7)

par(oma = c(0, 0, 1.5, 0), mfrow = c(2, 5), mar = c(3, 3, 1, 1), 
    mgp = c(1.3, .2, 0), tck = -.01)
for (loop in 1:10) {
  prior_predictive(score_adj, score_adj_jitt, 0, 5, 0, 5)
}
mtext("10 prior predictive simulations with a ~ normal(0, 5) and b ~ normal(0, 5)", 
      side = 3, line = .5, outer = TRUE, cex = .7)

par(oma = c(0, 0, 1.5, 0), mfrow = c(2, 5), mar = c(3, 3, 1, 1), 
    mgp = c(1.3, .2, 0), tck = -.01)
for (loop in 1:10) {
  prior_predictive(score_adj, score_adj_jitt, 0, 50, 0, 50)
}
mtext("10 prior predictive simulations with a ~ normal(0, 50) and b ~ normal(0, 50)", 
      side = 3, line = .5, outer = TRUE, cex = .7)

8 Breaking the model

print_stan_code(logit_guessing_uncentered$code())

data {
  int J;
  array[J] int<lower=0, upper=1> y;
  vector[J] x;
  real mu_a, mu_b;
  real<lower=0> sigma_a, sigma_b;
}
parameters {
  real a, b;
}
model {
  a ~ normal(mu_a, sigma_a);
  b ~ normal(mu_b, sigma_b);
  y ~ bernoulli(0.25 + 0.75*inv_logit(a + b*x));
}

Simulate data

set.seed(123)
J <- 32
x <- runif(J, 10, 20)
a_ <- -6
b_ <- 0.4
y <- rbinom(J, 1, 0.25 + 0.75 * invlogit(a_ + b_ * x))
m_x <- mean(x)
s_x <- sd(x)
x_adj <- (x - m_x)/s_x

break_data <- list(
  J = J,
  x = x,
  y = y,
  mu_a = 0,
  sigma_a = 1000,
  mu_b = 0,
  sigma_b = 1000
)

break_1_fit <- logit_guessing_uncentered$sample(data = break_data, refresh = 0)

print(break_1_fit)

a <- extract_variable(break_1_fit, "a")
b <- extract_variable(break_1_fit, "b")
n_sims <- length(a)

par(mar = c(3, 3, 1, 1), mgp = c(1.5, .5, 0), tck = -.01)
plot(
  x, y,
  xlab = "Exam score",
  ylab = "Pr (correct answer)",
  yaxs = "i", bty = "l", type = "n"
)
for (s in sample(n_sims, 20)) {
  curve(
    0.25 + 0.75 * invlogit(a[s] + b[s] * x),
    lwd = .5, col = "red", add = TRUE
  )
}
points(x, 0.5 + 0.985 * (y - 0.5), cex = .7, pch = 20)
curve(0.25 + 0.75 * invlogit(a_ + b_ * x), add = TRUE)

break_2_fit <- logit_guessing$sample(data = break_data, refresh = 0)

print(break_2_fit)

a <- extract_variable(break_2_fit, "a")
b <- extract_variable(break_2_fit, "b")
n_sims <- length(a)

par(mar = c(3, 3, 1, 1), mgp = c(1.5, .5, 0), tck = -.01)
plot(
  x_adj, y,
  xlim = c(-2, 2),
  xlab = "Standardized exam score",
  ylab = "Pr (correct answer)",
  yaxs = "i", bty = "l", type = "n"
)
for (s in sample(n_sims, 20)) {
  curve(
    0.25 + 0.75 * invlogit(a[s] + b[s] * x),
    lwd = .5, col = "red", add = TRUE
  )
}
points(x_adj, 0.5 + 0.985 * (y - 0.5), cex = .7, pch = 20)
curve(0.25 + 0.75 * invlogit(a_ + b_ * (m_x + s_x * x)), add = TRUE)

plot_logit_grid_2(
  "Breaking the model",
  "Exam score",
  logit_guessing_multilevel,
  c(longdata, list(
    mu_mu_a = 0, sigma_mu_a = 5, 
    mu_mu_b = 0, sigma_mu_b = 5, 
    mu_sigma_a = 5, mu_sigma_b = 5
  )),
  score_jitt,
  item_id,
  guessprob = 0.25
)

 variable    mean  median   sd  mad      q5     q95 rhat ess_bulk ess_tail
  lp__    -382.25 -381.94 6.52 6.53 -393.31 -371.94 1.00     1041     1577
  mu_a       0.84    0.84 0.36 0.35    0.26    1.45 1.01      828     1502
  mu_b       1.09    1.08 0.18 0.17    0.81    1.41 1.00     3608     2054
  sigma_a    1.66    1.61 0.36 0.34    1.15    2.32 1.00     1297     2002
  sigma_b    0.25    0.21 0.19 0.19    0.02    0.63 1.00     1192     1535
  a[1]       0.87    0.87 0.52 0.51    0.02    1.72 1.00     7095     3182
  a[2]      -1.87   -1.78 0.76 0.73   -3.24   -0.78 1.00     4736     3130
  a[3]       1.80    1.79 0.60 0.59    0.83    2.85 1.00     7355     2447
  a[4]      -1.64   -1.58 0.67 0.63   -2.81   -0.67 1.00     5210     3512
  a[5]       0.28    0.29 0.52 0.51   -0.58    1.11 1.00     6180     2942

 # showing 10 of 53 rows (change via 'max_rows' argument or 'cmdstanr_max_rows' option)

References

Gelman, A., and A. Vehtari. 2024. Active Statistics: Stories, Games, Problems, and Hands-on Demonstrations for Applied Regression and Causal Inference. Cambridge University Press.