library(rprojroot)
root <- has_file(".Bayesian-Workflow-root")$make_fix_file()
library(cmdstanr)
# CmdStanR output directory makes Quarto cache to work
dir.create(root("coronavirus", "stan_output"), showWarnings = FALSE)
options(cmdstanr_output_dir = root("coronavirus", "stan_output"))
options(mc.cores = 4)
library(posterior)
library(priorsense)
library(ggplot2)
library(bayesplot)
theme_set(bayesplot::theme_default(base_family = "sans", base_size = 14))
library(dplyr)
library(ggh4x)
print_stan_file <- function(file) {
code <- readLines(file)
if (isTRUE(getOption("knitr.in.progress")) &
identical(knitr::opts_current$get("results"), "asis")) {
# In render: emit as-is so Pandoc/Quarto does syntax highlighting
block <- paste0("```stan", "\n", paste(code, collapse = "\n"), "\n", "```")
knitr::asis_output(block)
} else {
writeLines(code)
}
}Building up to a hierarchical model: Coronavirus testing
This notebook includes the code for the Bayesian Workflow book Chapter 19 Building up to a hierarchical model: Coronavirus testing.
1 Introduction
In early April, 2020, Bendavid et al. (2020a) recruited 3330 residents of Santa Clara County, California and tested them for SARS-CoV-2 antibodies. We re-analyze the data.
Load packages
Define function to compute shortest posterior interval (SPIN; Liu, Gelman, and Zheng (2015))
spin <- function(x, lower=NULL, upper=NULL, conf=0.95) {
x <- sort(as.vector(x))
if (!is.null(lower)) {
if (lower > min(x)) stop("lower bound is not lower than all the data")
else x <- c(lower, x)
}
if (!is.null(upper)) {
if (upper < max(x)) stop("upper bound is not higher than all the data")
else x <- c(x, upper)
}
n <- length(x)
gap <- round(conf*n)
width <- x[(gap+1):n] - x[1:(n-gap)]
index <- min(which(width==min(width)))
x[c(index, index + gap)]
}2 Simple model fit using pooled specificity and sensitivity
code <- root("coronavirus", "santa-clara.stan")print_stan_file(code)data {
int<lower = 0> y_sample, n_sample, y_spec, n_spec, y_sens, n_sens;
}
parameters {
real<lower=0, upper = 1> p, spec, sens;
}
transformed parameters {
real p_sample = p * sens + (1 - p) * (1 - spec);
}
model {
y_sample ~ binomial(n_sample, p_sample);
y_spec ~ binomial(n_spec, spec);
y_sens ~ binomial(n_sens, sens);
}
generated quantities {
real log_lik = binomial_lpmf(y_sample | n_sample, p_sample);
real lprior = binomial_lpmf(y_spec | n_spec, spec) +
binomial_lpmf(y_sens | n_sens, sens);
}sc_model <- cmdstan_model(code)Compute posterior with data from Bendavid et al. (2020a), 11 April 2020
fit_1 <- sc_model$sample(
data = list(
y_sample = 50,
n_sample = 3330,
y_spec = 369 + 30,
n_spec = 371 + 30,
y_sens = 25 + 78,
n_sens = 37 + 85
),
refresh = 0,
iter_warmup = 1e4,
iter_sampling = 1e4
)
print(fit_1, digits = 3)Check prior-likelihood sensitivity using powerscaling
powerscale_sensitivity(fit_1)Sensitivity based on cjs_dist
Prior selection: all priors
Likelihood selection: all data
variable prior likelihood diagnosis
p 0.046 0.055 -
spec 0.094 0.015 potential strong prior / weak likelihood
sens 0.109 0.001 potential strong prior / weak likelihood
p_sample 0.007 0.101 -
powerscale_plot_dens(
fit_1,
variables = c("p", "spec", "sens"),
lower_alpha = 0.5,
help_text = FALSE
)Inference for the population prevalence
draws_1 <- fit_1$draws()
subset <- sample(1e4, 1e3)
x <- as.vector(draws_1[subset, , "spec"])
y <- as.vector(draws_1[subset, , "p"])par(mar = c(3, 3, 0, 1), mgp = c(2, .7, 0), tck = -.02)
plot(x, y,
xlim = c(min(x), 1), ylim = c(0, max(y)),
xaxs = "i", yaxs = "i",
xlab = expression(paste("Specificity, ", gamma)),
ylab = expression(paste("Prevalence, ", pi)), bty = "l",
pch=20, cex=.3)ggplot version
fit_1$draws() |>
resample_draws(ndraws = 1e3) |>
mcmc_scatter(pars = c("spec", "p"), size = 1, alpha = 0.5) +
lims(x = c(NA, 1), y = c(0, NA)) +
coord_cartesian(expand = c(bottom = FALSE)) +
labs(x = expression(paste("Specificity, ", gamma)),
y = expression(paste("Prevalence, ", pi)))par(mar = c(3, 3, 0, 1), mgp = c(2, .7, 0), tck = -.02)
hist(y,
yaxt = "n", yaxs = "i",
xlab = expression(paste("Prevalence, ", pi)),
ylab = "", main = "")fit_1$draws() |>
mcmc_hist(pars = c("p"), breaks = seq(0, 0.03, length.out = 30)) +
theme(axis.line.y = element_blank()) +
labs(x = expression(paste("Prevalence, ", pi)))Use the shortest posterior interval, which makes more sense than a central interval because of the skewness of the posterior and the hard boundary at 0.
print(spin(draws_1[, , "p"], lower = 0, upper = 1, conf = 0.95), digits = 2)[1] 0.0011 0.0186
Compute posterior with data from Bendavid et al. (2020b), 27 April 2020
fit_2 <- sc_model$sample(
data = list(
y_sample = 50,
n_sample = 3330,
y_spec = 3308,
n_spec = 3324,
y_sens = 130,
n_sens = 157
),
refresh = 0,
iter_warmup = 1e4,
iter_sampling = 1e4
)
print(fit_2, digits = 3)
draws_2 <- fit_2$draws()
print(spin(draws_2[, , "p"], lower = 0, upper = 1, conf = 0.95), digits = 2)Check prior-likelihood sensitivity using powerscaling
powerscale_sensitivity(fit_2)Sensitivity based on cjs_dist
Prior selection: all priors
Likelihood selection: all data
variable prior likelihood diagnosis
p 0.030 0.077 -
spec 0.119 0.001 potential strong prior / weak likelihood
sens 0.101 0.001 potential strong prior / weak likelihood
p_sample 0.001 0.098 -
powerscale_plot_dens(
fit_2,
variables = c("p", "spec", "sens"),
lower_alpha = 0.5,
help_text = FALSE
)3 Hierarchical model for sensitivity and specificity
Hierarchical model that allows sensitivity and specificity to vary across studies.
code_hierarchical <- root("coronavirus", "santa-clara-hierarchical.stan")print_stan_file(code_hierarchical)data {
int y_sample;
int n_sample;
int J_spec;
int J_sens;
array[J_spec] int y_spec;
array[J_spec] int n_spec;
array[J_sens] int y_sens;
array[J_sens] int n_sens;
real logit_spec_prior_scale;
real logit_sens_prior_scale;
}
parameters {
real<lower=0, upper=1> p;
real mu_logit_spec;
real mu_logit_sens;
real<lower=0> sigma_logit_spec;
real<lower=0> sigma_logit_sens;
vector<offset=mu_logit_spec, multiplier=sigma_logit_spec>[J_spec] logit_spec;
vector<offset=mu_logit_sens, multiplier=sigma_logit_sens>[J_sens] logit_sens;
}
transformed parameters {
vector[J_spec] spec = inv_logit(logit_spec);
vector[J_sens] sens = inv_logit(logit_sens);
real p_sample = p * sens[1] + (1 - p) * (1 - spec[1]);
}
model {
y_sample ~ binomial(n_sample, p_sample);
y_spec ~ binomial(n_spec, spec);
y_sens ~ binomial(n_sens, sens);
logit_spec ~ normal(mu_logit_spec, sigma_logit_spec);
logit_sens ~ normal(mu_logit_sens, sigma_logit_sens);
sigma_logit_spec ~ normal(0, logit_spec_prior_scale);
sigma_logit_sens ~ normal(0, logit_sens_prior_scale);
mu_logit_spec ~ normal(4, 2); // weak prior on mean of dist of specificity
mu_logit_sens ~ normal(4, 2); // weak prior on mean of dist of sensitivity
}
generated quantities {
real log_lik = binomial_lpmf(y_sample | n_sample, p_sample);
real lprior = normal_lpdf(mu_logit_spec | 4, 2) +
normal_lpdf(mu_logit_sens | 4, 2);
}sc_model_hierarchical <- cmdstan_model(code_hierarchical)Compute posterior using data from Bendavid et al. (2020b) 27 April 2020
santaclara_data <- list(
y_sample = 50,
n_sample = 3330,
J_spec = 14,
y_spec = c(0, 368, 30, 70, 1102, 300, 311, 500, 198, 99, 29, 146, 105, 50),
n_spec = c(0, 371, 30, 70, 1102, 300, 311, 500, 200, 99, 31, 150, 108, 52),
J_sens = 4,
y_sens = c(0, 78, 27, 25),
n_sens = c(0, 85, 37, 35),
logit_spec_prior_scale = 1,
logit_sens_prior_scale = 1
)
fit_3a <- sc_model_hierarchical$sample(
data = santaclara_data,
refresh = 0,
iter_warmup = 1e4,
iter_sampling = 1e4,
adapt_delta = 0.99 # to reduce divergences
)
print_variables <- c("p", "spec[1]", "sens[1]",
"mu_logit_spec", "mu_logit_sens",
"sigma_logit_spec", "sigma_logit_sens")
print(fit_3a, variables = print_variables, digits = 3)
draws_3a <- fit_3a$draws()
print(spin(draws_3a[, , "p"], lower = 0, upper = 1, conf = 0.95), digits = 2)Check prior-likelihood sensitivity using powerscaling
powerscale_sensitivity(fit_3a, variable = c("p", "sens", "spec")) |>
print(n = Inf)Sensitivity based on cjs_dist
Prior selection: all priors
Likelihood selection: all data
variable prior likelihood diagnosis
p 0.102 0.010 potential strong prior / weak likelihood
sens[1] 0.061 0.001 potential strong prior / weak likelihood
sens[2] 0.013 0.001 -
sens[3] 0.014 0.001 -
sens[4] 0.013 0.001 -
spec[1] 0.016 0.022 -
spec[2] 0.005 0.003 -
spec[3] 0.023 0.008 -
spec[4] 0.019 0.003 -
spec[5] 0.020 0.002 -
spec[6] 0.020 0.005 -
spec[7] 0.021 0.003 -
spec[8] 0.020 0.002 -
spec[9] 0.007 0.002 -
spec[10] 0.023 0.003 -
spec[11] 0.002 0.002 -
spec[12] 0.002 0.002 -
spec[13] 0.002 0.002 -
spec[14] 0.003 0.002 -
powerscale_plot_dens(
fit_3a,
variables = c("p", "spec[1]", "sens[1]"),
lower_alpha = 0.5,
help_text = FALSE
) +
ggh4x::facetted_pos_scales(x=rep(list(scale_x_continuous(limits=c(0, 0.35)),
scale_x_continuous(limits=c(0, 1)),
scale_x_continuous(limits=c(0.98, 1))), 2))4 Hierarchical model with stronger priors
Compute posterior using data from Bendavid et al. (2020b) 27 April 2020 and stronger priors
santaclara_data$logit_spec_prior_scale <- 0.3
santaclara_data$logit_sens_prior_scale <- 0.3MCMC gets sometimes stuck on minor mode, so we initialize with Pathfinder
pth_3b <- sc_model_hierarchical$pathfinder(
data = santaclara_data,
refresh = 0,
max_lbfgs_iters = 100,
num_paths = 10
)
fit_3b <- sc_model_hierarchical$sample(
data = santaclara_data,
refresh = 0,
iter_warmup = 1e4,
iter_sampling = 1e4,
adapt_delta = 0.99, # not as necessary with the stronger priors, but include just in case
init = pth_3b
)
print(fit_3b, variables = print_variables, digits = 3)
draws_3b <- fit_3b$draws()
print(spin(draws_3b[, , "p"], lower = 0, upper = 1, conf = 0.95), digits = 2)
print(spin(draws_3a[, , "p"], lower = 0, upper = 1, conf = 0.95), digits = 2)
print(spin(draws_3a[, , "spec[1]"], lower = 0, upper = 1, conf = 0.95), digits = 2)
print(spin(draws_3a[, , "sens[1]"], lower = 0, upper = 1, conf = 0.95), digits = 2)
print(spin(draws_3a[, , "mu_logit_spec"], conf = 0.95), digits = 2)
print(spin(draws_3a[, , "mu_logit_sens"], conf = 0.95), digits = 2)
print(spin(draws_3a[, , "sigma_logit_spec"], conf = 0.95), digits = 2)
print(spin(draws_3a[, , "sigma_logit_sens"], conf = 0.95), digits = 2)
print(spin(draws_3b[, , "p"], lower = 0, upper = 1, conf = 0.95), digits = 2)
print(spin(draws_3b[, , "spec[1]"], lower = 0, upper = 1, conf = 0.95), digits = 2)
print(spin(draws_3b[, , "sens[1]"], lower = 0, upper = 1, conf = 0.95), digits = 2)
print(spin(draws_3b[, , "mu_logit_spec"], conf = 0.95), digits = 2)
print(spin(draws_3b[, , "mu_logit_sens"], conf = 0.95), digits = 2)
print(spin(draws_3b[, , "sigma_logit_spec"], conf = 0.95), digits = 2)
print(spin(draws_3b[, , "sigma_logit_sens"], conf = 0.95), digits = 2)5 MRP model
MRP model allowing prevalence to vary by sex, ethnicity, age category, and zip code. Model is set up to use the ethnicity, age, and zip categories of Bendavid et al. (2020b)
code_model_hierarchical_mrp <- root("coronavirus", "santa-clara-hierarchical-mrp.stan")print_stan_file(code_model_hierarchical_mrp)data {
int<lower = 0> N; // number of tests in the sample (3330 for Santa Clara)
array[N] int<lower = 0, upper = 1> y; // 1 if positive, 0 if negative
vector<lower = 0, upper = 1>[N] male; // 0 if female, 1 if male
array[N] int<lower = 1, upper = 4> eth; // 1=white, 2=asian, 3=hispanic, 4=other
array[N] int<lower = 1, upper = 4> age; // 1=0-4, 2=5-18, 3=19-64, 4=65+
int<lower = 0> N_zip; // number of zip codes (58 in this case)
array[N] int<lower = 1, upper = N_zip> zip; // zip codes 1 through 58
vector[N_zip] x_zip; // predictors at the zip code level
int<lower = 0> J_spec;
array[J_spec] int<lower = 0> y_spec;
array[J_spec] int<lower = 0> n_spec;
int<lower = 0> J_sens;
array[J_sens] int<lower = 0> y_sens;
array[J_sens] int<lower = 0> n_sens;
int<lower = 0> J; // number of population cells, J = 2*4*4*58
vector<lower = 0>[J] N_pop; // population sizes for poststratification
real<lower = 0> coef_prior_scale;
real<lower = 0> logit_spec_prior_scale;
real<lower = 0> logit_sens_prior_scale;
}
parameters {
real mu_logit_spec;
real mu_logit_sens;
real<lower = 0> sigma_logit_spec;
real<lower = 0> sigma_logit_sens;
vector<offset = mu_logit_spec, multiplier = sigma_logit_spec>[J_spec] logit_spec;
vector<offset = mu_logit_sens, multiplier = sigma_logit_sens>[J_sens] logit_sens;
vector[3] b; // intercept, coef for male, and coef for x_zip
real<lower = 0> sigma_eth;
real<lower = 0> sigma_age;
real<lower = 0> sigma_zip;
vector<multiplier = sigma_eth>[4] a_eth; // varying intercepts for ethnicity
vector<multiplier = sigma_age>[4] a_age; // varying intercepts for age category
vector<multiplier = sigma_zip>[N_zip] a_zip; // varying intercepts for zip code
}
transformed parameters {
vector[J_spec] spec = inv_logit(logit_spec);
vector[J_sens] sens = inv_logit(logit_sens);
}
model {
vector[N] p = inv_logit(b[1]
+ b[2] * male
+ b[3] * x_zip[zip]
+ a_eth[eth]
+ a_age[age]
+ a_zip[zip]);
vector[N] p_sample = p * sens[1] + (1 - p) * (1 - spec[1]);
y ~ bernoulli(p_sample);
y_spec ~ binomial(n_spec, spec);
y_sens ~ binomial(n_sens, sens);
logit_spec ~ normal(mu_logit_spec, sigma_logit_spec);
logit_sens ~ normal(mu_logit_sens, sigma_logit_sens);
sigma_logit_spec ~ normal(0, logit_spec_prior_scale);
sigma_logit_sens ~ normal(0, logit_sens_prior_scale);
mu_logit_spec ~ normal(4, 2); // weak prior on mean of distribution of spec
mu_logit_sens ~ normal(4, 2); // weak prior on mean of distribution of sens
a_eth ~ normal(0, sigma_eth);
a_age ~ normal(0, sigma_age);
a_zip ~ normal(0, sigma_zip);
// prior on centered intercept
b[1] + b[2] * mean(male) + b[3] * mean(x_zip[zip]) ~ logistic(0, 1);
b[2] ~ normal(0, coef_prior_scale);
b[3] ~ normal(0, coef_prior_scale / sd(x_zip[zip])); // prior on scaled coef
sigma_eth ~ normal(0, coef_prior_scale);
sigma_age ~ normal(0, coef_prior_scale);
sigma_zip ~ normal(0, coef_prior_scale);
}
generated quantities {
real p_avg;
vector[J] p_pop; // population prevalence in the J poststratification cells
int count;
count = 1;
for (i_zip in 1:N_zip) {
for (i_age in 1:4) {
for (i_eth in 1:4) {
for (i_male in 0:1) {
p_pop[count] = inv_logit(b[1]
+ b[2] * i_male
+ b[3] * x_zip[i_zip]
+ a_eth[i_eth]
+ a_age[i_age]
+ a_zip[i_zip]);
count += 1;
}
}
}
}
p_avg = sum(N_pop .* p_pop) / sum(N_pop);
}sc_model_hierarchical_mrp <- cmdstan_model(code_model_hierarchical_mrp)To fit the model, we need individual-level data.
These data are not publicly available, so just to get the program running, we take the existing 50 positive tests and assign them at random to the 3330 people.
N <- 3330
y <- sample(rep(c(0, 1), c(3330 - 50, 50)))
n <- rep(1, 3330)Here are the counts of each sex, ethnicity, and age from Bendavid et al. (2020b).
We don’t have zip code distribution but we looked it up and there are 58 zip codes in Santa Clara County; for simplicity we assume all zip codes are equally likely.
We then assign these traits to people at random.
This is wrong–actually, these variable are correlated in various ways–but, again, now we have fake data we can use to fit the model.
male <- sample(rep(c(0, 1), c(2101, 1229)))
eth <- sample(rep(1:4, c(2118, 623, 266, 306 + 17)))
age <- sample(rep(1:4, c(71, 550, 2542, 167)))
N_zip <- 58
zip <- sample(1:N_zip, 3330, replace = TRUE)Setting up the zip code level predictor.
In this case we will use a random number with mean 50 and standard deviation 20.
These are arbitrary numbers that we chose just to be able to test the centering and scaling in the model.
In real life we might use %Latino or average income in the zip code
x_zip <- rnorm(N_zip, 50, 20)Setting up the poststratification table.
For simplicity we assume there are 1000 people in each cell in the county.
Actually we’d want data from the Census.
J <- 2 * 4 * 4 * N_zip
N_pop <- rep(NA, J)
count <- 1
for (i_zip in 1:N_zip){
for (i_age in 1:4){
for (i_eth in 1:4){
for (i_male in 0:1){
N_pop[count] <- 1000
count = count + 1
}
}
}
}Put together the data and fit the model
santaclara_mrp_data <- list(
N = N,
y = y,
male = male,
eth = eth,
age = age,
zip = zip,
N_zip = N_zip,
x_zip = x_zip,
J_spec = 14,
y_spec = c(0, 368, 30, 70, 1102, 300, 311, 500, 198, 99, 29, 146, 105, 50),
n_spec = c(0, 371, 30, 70, 1102, 300, 311, 500, 200, 99, 31, 150, 108, 52),
J_sens = 4,
y_sens = c(0, 78, 27, 25),
n_sens = c(0, 85, 37, 35),
logit_spec_prior_scale = 0.3,
logit_sens_prior_scale = 0.3,
coef_prior_scale = 0.5,
J = J,
N_pop = N_pop
)
fit_4 <- sc_model_hierarchical_mrp$sample(data = santaclara_mrp_data)Show inferences for some model parameters. In addition to p_avg, the population prevalence, we also look at the inferences for the first three poststratification cells just to check that everything makes sense
print_variables <- c("p_avg", "b", "a_age", "a_eth",
"sigma_eth", "sigma_age", "sigma_zip",
"mu_logit_spec", "sigma_logit_spec",
"mu_logit_sens", "sigma_logit_sens",
"p_pop[1]", "p_pop[2]", "p_pop[3]")
print(fit_4, variables = print_variables, max_rows = 100) variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
p_avg 0.01 0.01 0.01 0.01 0.00 0.02 1.00 1662 1355
b[1] -3.97 -3.75 1.24 0.90 -6.42 -2.43 1.00 1360 775
b[2] -0.25 -0.24 0.38 0.37 -0.88 0.38 1.00 4879 2704
b[3] -0.02 -0.02 0.02 0.02 -0.05 0.01 1.00 1969 1323
a_age[1] -0.02 0.00 0.39 0.20 -0.67 0.53 1.00 4962 3082
a_age[2] -0.01 0.00 0.36 0.20 -0.59 0.54 1.00 4940 3325
a_age[3] -0.11 -0.05 0.35 0.19 -0.75 0.34 1.00 4728 3368
a_age[4] -0.01 0.00 0.38 0.21 -0.62 0.56 1.00 4961 3140
a_eth[1] -0.05 -0.01 0.35 0.20 -0.67 0.45 1.00 4570 3391
a_eth[2] -0.18 -0.07 0.40 0.23 -0.94 0.31 1.00 3177 3517
a_eth[3] 0.15 0.06 0.40 0.23 -0.37 0.92 1.00 4033 3649
a_eth[4] -0.10 -0.03 0.39 0.21 -0.79 0.42 1.00 4836 3300
sigma_eth 0.35 0.29 0.27 0.27 0.02 0.85 1.00 2633 1722
sigma_age 0.32 0.27 0.26 0.23 0.03 0.86 1.00 3230 2308
sigma_zip 0.39 0.35 0.27 0.28 0.03 0.89 1.00 1947 1937
mu_logit_spec 5.19 5.19 0.33 0.33 4.65 5.75 1.00 2828 2891
sigma_logit_spec 0.72 0.72 0.21 0.20 0.37 1.07 1.00 2219 1724
mu_logit_sens 1.55 1.55 0.33 0.31 1.01 2.09 1.00 2877 2795
sigma_logit_sens 0.40 0.39 0.20 0.19 0.08 0.73 1.00 1741 1036
p_pop[1] 0.01 0.01 0.01 0.01 0.00 0.03 1.00 2001 1700
p_pop[2] 0.01 0.01 0.01 0.01 0.00 0.02 1.00 2198 1923
p_pop[3] 0.01 0.01 0.01 0.01 0.00 0.03 1.00 2013 1842
draws_4 <- fit_4$draws()
print(spin(draws_4[, , "p_avg"], lower = 0, upper = 1, conf = 0.95), digits = 2)[1] 0.00058 0.02245
6 Additional prior sensitivity analysis
pos_tests <- c(78, 27, 25)
tests <- c(85, 37, 35)
sens_df <- data.frame(pos_tests, tests, sample_sens = pos_tests / tests)
neg_tests <- c(368, 30, 70, 1102, 300, 311, 500, 198, 99, 29, 146, 105, 50)
tests <- c(371, 30, 70, 1102, 300, 311, 500, 200, 99, 31, 150, 108, 52)
spec_df <- data.frame(neg_tests, tests, sample_spec = neg_tests / tests)
pos_tests <- 50
tests <- 3330
unk_df <- data.frame(pos_tests, tests, sample_prev = pos_tests / tests)Stan model for prior sensitivity analysis
code_sens <- root("coronavirus", "prior-sensitivity.stan")print_stan_file(code_sens)data {
int<lower = 0> K_pos;
array[K_pos] int<lower = 0> N_pos;
array[K_pos] int<lower = 0> n_pos;
int<lower = 0> K_neg;
array[K_neg] int<lower = 0> N_neg;
array[K_neg] int<lower = 0> n_neg;
int<lower = 0> K_unk;
array[K_unk] int<lower = 0> N_unk;
array[K_unk] int<lower = 0> n_unk;
real<lower = 0> sigma_sigma_logit_sens;
real<lower = 0> sigma_sigma_logit_spec;
}
parameters {
real mu_logit_sens;
real mu_logit_spec;
real<lower = 0> sigma_logit_sens;
real<lower = 0> sigma_logit_spec;
vector<offset = mu_logit_sens,
multiplier = sigma_logit_sens>[K_pos] logit_sens;
vector<offset = mu_logit_spec,
multiplier = sigma_logit_spec>[K_neg] logit_spec;
vector<offset = mu_logit_sens,
multiplier = sigma_logit_sens>[K_unk] logit_sens_unk;
vector<offset = mu_logit_spec,
multiplier = sigma_logit_spec>[K_unk] logit_spec_unk;
real<lower = 0, upper = 1> pi;
}
transformed parameters {
vector[K_pos] sens = inv_logit(logit_sens);
vector[K_neg] spec = inv_logit(logit_spec);
vector[K_unk] sens_unk = inv_logit(logit_sens_unk);
vector[K_unk] spec_unk = inv_logit(logit_spec_unk);
}
model {
mu_logit_sens ~ normal(4, 2);
sigma_logit_sens ~ normal(0, sigma_sigma_logit_sens);
logit_sens ~ normal(mu_logit_sens, sigma_logit_sens);
n_pos ~ binomial_logit(N_pos, logit_sens);
mu_logit_spec ~ normal(4, 2);
sigma_logit_spec ~ normal(0, sigma_sigma_logit_spec);
logit_spec ~ normal(mu_logit_spec, sigma_logit_spec);
n_neg ~ binomial_logit(N_neg, logit_spec);
pi ~ uniform(0, 1);
logit_sens_unk ~ normal(mu_logit_sens, sigma_logit_sens);
logit_spec_unk ~ normal(mu_logit_spec, sigma_logit_spec);
n_unk ~ binomial(N_unk, pi * sens_unk + (1 - pi) * (1 - spec_unk));
}model_sens <- cmdstan_model(code_sens)Data
data <- list(
K_pos = nrow(sens_df),
N_pos = array(sens_df$tests),
n_pos = array(sens_df$pos_tests),
K_neg = nrow(spec_df),
N_neg = array(spec_df$tests),
n_neg = array(spec_df$neg_tests),
K_unk = nrow(unk_df),
N_unk = array(unk_df$tests),
n_unk = array(unk_df$pos_tests)
)Empty data frame to store posterior intervals with different priors
ribbon_df <- data.frame(
sigma_sens = c(),
sigma_spec = c(),
prev05 = c(),
prev50 = c(),
prev95 = c()
)Loop over different prior parameter values
sigma_senss <- c(0.01, 0.25, 0.5, 0.75, 1)
sigma_specss <- c(0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)
for (sigma_sens in sigma_senss) {
for (sigma_spec in sigma_specss) {
data2 <- append(data, list(sigma_sigma_logit_sens = sigma_sens,
sigma_sigma_logit_spec = sigma_spec))
fit <- model_sens$sample(
data = data2,
iter_warmup = 5e4,
iter_sampling = 5e4,
seed = 1234,
refresh = 0,
adapt_delta = 0.95,
show_messages = FALSE,
show_exceptions = FALSE
)
pis <- fit$draws()[, , "pi"]
ribbon_df <- rbind(ribbon_df,
data.frame(sigma_sens = paste("sensitivity hyperprior", "=", sigma_sens),
sigma_spec = sigma_spec,
prev05 = quantile(pis, 0.05),
prev50 = quantile(pis, 0.5),
prev95 = quantile(pis, 0.95)))
}
}ggplot(ribbon_df, aes(x = sigma_spec)) +
facet_wrap(~ sigma_sens, nrow = 1) +
geom_ribbon(aes(ymin = prev05, ymax = prev95), fill = "gray95") +
geom_line(aes(y = prev50), linewidth = 0.5) +
geom_line(aes(y = prev05), color = "darkgray", linewidth = 0.25) +
geom_line(aes(y = prev95), color = "darkgray", linewidth = 0.25) +
scale_y_log10(limits = c(0.0009, 1.1), breaks = c(0.001, 0.01, 0.1, 1)) +
scale_x_continuous(expand = c(0, 0), lim = c(0, 1),
breaks = c(0.1, 0.3, 0.5, 0.7, 0.9)) +
ylab("prevalence") +
xlab("specificity hyperprior") +
theme_bw() +
theme(panel.spacing = unit(0.25, "lines"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
strip.background = element_blank())References
Licenses
- Code © 2022–2025, Andrew Gelman and Aki Vehtari, licensed under BSD-3.
- Text © 2022–2025, Andrew Gelman and Aki Vehtari, licensed under CC-BY-NC 4.0.