Incremental development and testing: Black cat adoptions

Author

Richard McElreath and Aki Vehtari

Published

2025-12-27

Modified

2026-06-15

This notebook includes the code for the Bayesian Workflow book Chapter 22 Incremental development and testing: Black cat adoptions.

1 Introduction

Intro text

Load packages

library(rprojroot)
root <- has_file(".Bayesian-Workflow-root")$make_fix_file()
library(scales)
library(cmdstanr)
# CmdStanR output directory makes Quarto cache to work
dir.create(root("cat_adoptions", "stan_output"), showWarnings = FALSE)
options(cmdstanr_output_dir = root("cat_adoptions", "stan_output"))
library(posterior)
library(survival)
library(rethinking)
library(dplyr)
library(readr)
library(stringr)
library(ggplot2)
library(bayesplot)
theme_set(bayesplot::theme_default(base_family = "sans", base_size = 14))
library(ggdist)
library(ggsurvfit)

Utility functions for sampling using cmdstanr and plotting

cstan <- function(f, data = list(), seed = 123, chains = 4) {
  model <- cmdstan_model(f)
  fit <- model$sample(
    data = data,
    seed = seed,
    chains = chains,
    parallel_chains = chains,
    refresh = 0
  )
  return(fit)
}
dens <- function(x, adj = 0.5, norm.comp = FALSE, main = "",
                 show.HPDI = FALSE, add = FALSE, ...) {
  thed <- density(x, adjust = adj)
  if (add == FALSE) {
    plot(thed, main = main, ...)
  } else {
    lines(thed$x, thed$y, ...)
  }
}
print_stan_file <- function(file) {
  code <- readLines(file)
  if (isTRUE(getOption("knitr.in.progress")) &
        identical(knitr::opts_current$get("results"), "asis")) {
    # In render: emit as-is so Pandoc/Quarto does syntax highlighting
    block <- paste0("```stan", "\n", paste(code, collapse = "\n"), "\n", "```")
    knitr::asis_output(block)
  } else {
    writeLines(code)
  }
}

2 Data

Cat adoptions data is available in rethinking package. We read the data from github, so there is no need to install the rethinking package.

urlfile <- "https://raw.githubusercontent.com/rmcelreath/rethinking/master/data/AustinCats.csv"
d <- read_delim(urlfile, delim = ";")
glimpse(d)

Rows: 22,356
Columns: 9
$ id            <chr> "A730601", "A679549", "A683656", "A709749", "A733551", "…
$ days_to_event <dbl> 1, 25, 4, 41, 9, 4, 4, 5, 24, 2, 34, 27, 3, 151, 106, 4,…
$ date_out      <chr> "07/08/2016 09:00:00 AM", "06/16/2014 01:54:00 PM", "07/…
$ out_event     <chr> "Transfer", "Transfer", "Adoption", "Transfer", "Transfe…
$ date_in       <chr> "07/07/2016 12:11:00 PM", "05/22/2014 03:43:00 PM", "07/…
$ in_event      <chr> "Stray", "Stray", "Stray", "Stray", "Stray", "Stray", "S…
$ breed         <chr> "Domestic Shorthair Mix", "Domestic Shorthair Mix", "Sno…
$ color         <chr> "Blue Tabby", "Black/White", "Lynx Point", "Calico", "Br…
$ intake_age    <dbl> 7, 1, 2, 12, 1, 1, 2, 24, 1, 3, 4, 12, 1, 7, 0, 12, 1, 1…

d <- d |> mutate(days = days_to_event,
                 adopted = ifelse(out_event == "Adoption", 1, 0),
                 color = ifelse(color == "Black", 1, 2))

Prepare data for Stan models

dat <- list(N = nrow(d),
            days = d$days,
            adopted = d$adopted,
            color = d$color)

Plot individual cats as lines

## command below makes the plot window with right aspect ratio
# rethinking::blank2(w=1.2)

n <- 100
idx <- sample(1:dat$N, size = n)
ymax <- max(dat$days[idx])
plot(NULL, xlim = c(0, ymax), ylim = c(1, n),
     xlab = "Days observed", ylab = "Cat")
for (i in 1:n) {
  j <- idx[i]
  cat_color <- ifelse(dat$color[j] == 1, "black", "orange")
  lines(c(0, dat$days[j]), c(i, i), lwd = 4, col = cat_color)
  if (dat$adopted[j] == 1) {
    points(dat$days[j], i, pch = 16, cex = 1.5, col = cat_color)
  }
}

ggplot version

d[idx, ] |>
  ggplot(aes(y = seq_along(days), x = days, color = factor(color))) +
  geom_segment(aes(yend = seq_along(days), xend = 0), size = 1) +
  geom_point(aes(shape = factor(adopted)), size = 3) +
  scale_shape_manual(values = c(1, 16), labels = c("Other", "Adopted")) +
  scale_color_manual(values = c("1" = "black", "2" = "orange"),
                     labels = c("Black", "Other")) +
  labs(y = "Cat", x = "Days observed", color = "Color", shape = "Event") +
  coord_cartesian(expand = c(left = FALSE)) +
  theme(legend.position = "inside",
        legend.justification.inside = c(1, 0.5))

3 Generative models

How should we model these data? Think about how they were generated. We start with process of adoption and then add observation (censoring) process.

cat_adopt <- function(day, prob) {
  if (day > 1000 ) return(day)
  if (runif(1) > prob) {
    # keep waiting...
    day <- cat_adopt(day + 1, prob)
  }
  day # adopted
}
sim_cats1 <- function(n = 10, p = c(0.1, 0.2)) {
  color <- rep(NA, n)
  days <- rep(NA, n)
  for (i in 1:n) {
    color[i] <- sample(c(1, 2), size = 1, replace = TRUE)
    days[i] <- cat_adopt(1, p[color[i]])
  }
  return(list(N = n, days = days, color = color, adopted = rep(1, n)))
}

# version using rgeom - just for comparison
#sim_cats1 <- function(n=1e3,p=c(0.1,0.2)) {
#    color <- sample(c(1,2),size=n,replace=TRUE)
#    days <- rgeom( n , p[color] ) + 1
#    return(list(N=n,days=days,color=color,adopted=rep(1,n)))
#}

Simulate using the generative model

synth_cats <- sim_cats1(1e3)

Plot empirical K-M curves

sfit <- survfit(Surv(days, adopted) ~ color, data = synth_cats)
plot(sfit, lty = 1, lwd = 3, col = c("black", "orange"),
     xlab = "Days", ylab = "Proportion un-adopted")

Plot empirical K-M curves using ggplot

synth_cats |>
  survfit2(formula = Surv(days, adopted) ~ color, data = _) |> # ggsurvfit version
  tidy_survfit() |>
  ggplot(aes(x = time, y = estimate, color = strata)) +
  geom_step(linewidth = 1) +
  xlim(c(0, 50)) +
  scale_y_continuous(limits = c(0, 1), expand = expansion(mult = c(0, 0.02))) +
  scale_color_manual(values = c("1" = "black", "2" = "orange"),
                     labels = c("Black", "Other")) +
  labs(x = "Days", y = "Proportion un-adopted", color = "Color") +
  theme(legend.position = "none")

3.1 First Stan model

cat_code1 <- root("cat_adoptions", "adoptions_observed.stan")

print_stan_file(cat_code1)

// observed adoptions only
data{
  int N;
  array[N] int adopted; // 1/0 indicator
  array[N] int days;       // days until event
  array[N] int color;   // 1=black, 2=other
}
parameters{
  vector<lower=0,upper=1>[2] p;
}
model{
  p ~ beta(1,10);
  for (i in 1:N) {
    real P = p[color[i]];
    if (adopted[i]==1) {
      target += log((1-P)^(days[i]-1) * P);
    } else {
      // something here
    }
  }
}

Prior predictive simulation

n <- 12
sim_prior <- replicate(n, rbeta(2, 1, 10))

# rethinking::blank2(w=1.2)
plot(NULL, xlab = "Days", ylab = "Proportion un-adopted",
     xlim = c(0, 50), ylim = c(0, 1))
mtext("Prior predictive distribution")
for (i in 1:n) {
  days_rep <- sim_cats1(n = 1e3, p = sim_prior[, i])
  xfit <- survfit(Surv(days, adopted) ~ color, data = days_rep)
  lines(xfit, lwd = 2, col = c("black", "orange"))
}

Prior predictive simulation with ggplot

lapply(1:n, \(i) sim_cats1(n = 1e3, p = sim_prior[, i]) |>
         as.data.frame() |> mutate(sim = i)) |>
  bind_rows() |>
  survfit2(formula = Surv(days, adopted) ~ color + sim, data = _) |> # ggsurvfit version
  tidy_survfit() |>
  mutate(color = str_split_i(strata, ", ", 1),
         sim = str_split_i(strata, ", ", 2)) |>
  ggplot(aes(x = time, y = estimate, color = color, group = interaction(color, sim))) +
  geom_step() +
  xlim(c(0, 50)) +
  scale_y_continuous(limits = c(0, 1), expand = expansion(mult = c(0, 0.02))) +
  scale_color_manual(values = c("1" = "black", "2" = "orange"),
                     labels = c("Black", "Other")) +
  labs(x = "Days", y = "Proportion un-adopted", color = "Color")

Test the first model code using simulated data

p <- c(0.1, 0.15)
sim_dat <- sim_cats1(n = 1000, p = p)

fit1s <- cstan(cat_code1, data = sim_dat)

Posterior summary

print(fit1s)

 variable     mean   median   sd  mad       q5      q95 rhat ess_bulk ess_tail
     lp__ -2977.74 -2977.41 1.04 0.72 -2979.82 -2976.77 1.00     1708     2348
     p[1]     0.11     0.11 0.00 0.00     0.10     0.11 1.00     3189     2288
     p[2]     0.16     0.16 0.01 0.01     0.15     0.17 1.00     3795     2917

Posterior with simulated data

post1s <- fit1s$draws(format = "df")
plot(density(post1s$`p[1]`), lwd = 3, xlab = "Probability of adoption",
     xlim = c(0.07, 0.2), main = "")
k <- density(post1s$`p[2]`)
lines(k$x, k$y, lwd = 3, col = "orange")
abline(v = p[1], lwd = 2)
abline(v = p[2], lwd = 2, col = "orange")

Posterior with simulated data with ggplot

post1s |>
  ggplot() +
  stat_slab(aes(x = `p[1]`), density = "unbounded", trim = FALSE, fill = NA, color = "black") +
  stat_slab(aes(x = `p[2]`), density = "unbounded", trim = FALSE, fill = NA, color = "orange") +
  scale_y_continuous(breaks = NULL) +
  theme(axis.line.y = element_blank(), strip.text.y = element_blank()) +
  coord_cartesian(expand = FALSE) +
  xlim(c(0.07, 0.2)) +
  labs(x = "Probability of adoption", y = "") +
  geom_vline(xintercept = p, color = c("black", "orange"))

Sample from the posterior using the real data

fit1 <- cstan(cat_code1, data = dat)

Posterior summary

print(fit1)

 variable      mean    median   sd  mad        q5       q95 rhat ess_bulk
     lp__ -52604.75 -52604.46 0.99 0.73 -52606.75 -52603.79 1.00     1973
     p[1]      0.02      0.02 0.00 0.00      0.02      0.02 1.00     2848
     p[2]      0.03      0.03 0.00 0.00      0.03      0.03 1.00     3084
 ess_tail
     2674
     2076
     2452

Kaplan-Meier posterior simulations

post1 <- fit1$draws(format = "df")
# rethinking::blank2(w=1.2)
plot(NULL, xlab = "Days", ylab = "Proportion un-adopted", xlim = c(0, 50), ylim = c(0, 1))
mtext("Posterior predictive distribution (1000 cats)")
n <- 12
for (i in 1:n) {
  days_rep <- sim_cats1(n = 1e3, p = post1[i, c("p[1]", "p[2]")])
  xfit <- survfit(Surv(days, adopted) ~ color, data = days_rep)
  lines(xfit, lwd = 2, col = alpha(c("black", "orange"), 0.5))
}

Posterior Kaplan-Meier with ggplot

lapply(1:n, \(i) sim_cats1(n = 1e3, p = post1[i, c("p[1]", "p[2]")]) |>
         as.data.frame() |> mutate(sim = i)) |>
  bind_rows() |>
  survfit2(formula = Surv(days, adopted) ~ color + sim, data = _) |> # ggsurvfit version
  tidy_survfit() |>
  mutate(color = str_split_i(strata, ", ", 1),
         sim = str_split_i(strata, ", ", 2)) |>
  ggplot(aes(x = time, y = estimate, color = color, group = interaction(color, sim))) +
  geom_step(alpha = 0.5) +
  xlim(c(0, 50)) +
  scale_y_continuous(limits = c(0, 1), expand = expansion(mult = c(0, 0.02))) +
  scale_color_manual(values = c("1" = "black", "2" = "orange"),
                     labels = c("Black", "Other")) +
  labs(x = "Days", y = "Proportion un-adopted", color = "Color") +
  theme(legend.position = "none")

3.2 Add observation (censoring) model

Simulate from the generative model

sim_cats2 <- function(n = 10, p = c(0.1, 0.2), cens = 50) {
  color <- rep(NA, n)
  days <- rep(NA, n)
  for (i in 1:n) {
    color[i] <- sample(c(1, 2), size = 1, replace = TRUE)
    days[i] <- cat_adopt(1, p[color[i]])
  }
  adopted <- ifelse(days < cens, 1, 0)
  days <- ifelse(adopted == 1, days, cens)
  return(list(N = n, days = days, color = color, adopted = adopted))
}

cat_code2 <- root("cat_adoptions", "adoptions_censored.stan")

print_stan_file(cat_code2)

// all events, including censored
data{
  int N;
  array[N] int adopted; // 1/0 indicator
  array[N] int days;    // days until event
  array[N] int color;   // 1=black, 2=other
}
parameters{
  vector<lower=0,upper=1>[2] p;
}
model{
  p ~ beta(1,10);
  for (i in 1:N) {
    real P = p[color[i]];
    if (adopted[i]==1) {
      target += log((1-P)^(days[i]-1) * P);
    } else {
      target += log((1-P)^days[i]);
    }
  }
}

Test censoring model using simulated data

sim_dat <- sim_cats2(n = 1e3, p = c(0.01, 0.02))

fit2s <- cstan(cat_code2, data = sim_dat)

Posterior summary

print(fit2s)

 variable     mean   median   sd  mad       q5      q95 rhat ess_bulk ess_tail
     lp__ -2681.93 -2681.63 1.00 0.76 -2683.97 -2680.95 1.00     2164     2351
     p[1]     0.01     0.01 0.00 0.00     0.01     0.01 1.00     3227     2605
     p[2]     0.02     0.02 0.00 0.00     0.02     0.02 1.00     3165     2493

post2s <- fit2s$draws(format = "df")
dens(post2s$`p[1]`, lwd = 3, xlab = "Probability of adoption", xlim = c(0, 0.03), ylim = c(0, 600))
dens(post2s$`p[2]`, add = TRUE, lwd = 3, col = "orange")
abline(v = 0.01, lwd = 2); abline(v = 0.02, lwd = 2, col = "orange")

Posterior with simulated data with ggplot

post2s |>
  ggplot() +
  stat_slab(aes(x = `p[1]`), density = "unbounded", trim = FALSE, fill = NA, color = "black") +
  stat_slab(aes(x = `p[2]`), density = "unbounded", trim = FALSE, fill = NA, color = "orange") +
  scale_y_continuous(breaks = NULL) +
  theme(axis.line.y = element_blank(), strip.text.y = element_blank()) +
  coord_cartesian(expand = FALSE) +
  xlim(c(0.005, 0.025)) +
  labs(x = "Probability of adoption", y = "") +
  geom_vline(xintercept = c(0.01, 0.02), color = c("black", "orange"))

Test previous model with new censored data

fit1s <- cstan(cat_code1, data = sim_dat)

print(fit1s)

post1s <- fit1s$draws(format = "df")
dens(post1s$`p[1]`, lwd = 3, xlab = "Probability of adoption",
     xlim = c(0, 0.06), ylim = c(0, 170))
dens(post1s$`p[2]`, add = TRUE, lwd = 3, col = "orange")
abline(v = 0.01, lwd = 2); abline(v = 0.02, lwd = 2, col = "orange")

Posterior with simulated data with ggplot

post1s |>
  ggplot() +
  stat_slab(aes(x = `p[1]`), density = "unbounded", trim = FALSE, fill = NA, color = "black") +
  stat_slab(aes(x = `p[2]`), density = "unbounded", trim = FALSE, fill = NA, color = "orange") +
  scale_y_continuous(breaks = NULL) +
  theme(axis.line.y = element_blank(), strip.text.y = element_blank()) +
  coord_cartesian(expand = FALSE) +
  xlim(c(0.008, 0.062)) +
  labs(x = "Probability of adoption", y = "") +
  geom_vline(xintercept = c(0.01, 0.02), color = c("black", "orange"))

Sample using real data

fit1 <- cstan(cat_code1, data = dat)
fit2 <- cstan(cat_code2, data = dat)

Kaplan-meier posterior simulations

# rethinking::blank2(w=1.2)
post1 <- fit1$draws(format = "df")
post2 <- fit2$draws(format = "df")
plot(NULL, xlab = "Days", ylab = "Proportion un-adopted", xlim = c(0, 50), ylim = c(0, 1))
mtext("Posterior predictive distribution (1000 cats)")
n <- 12
# New estimates
for (i in 1:n) {
  days_rep <- sim_cats1(n = 1e3, p = post2[i, c("p[1]", "p[2]")])
  xfit <- survfit(Surv(days, adopted) ~ color, data = days_rep)
  lines(xfit, lwd = 2, col = alpha(c("black", "orange"), 0.5))
}
# Add a few simulations from first model, to show impact of censoring
n <- 1
for (i in 1:n) {
  days_rep <- sim_cats1(n = 1e4, p = post1[i, c("p[1]", "p[2]")])
  xfit <- survfit(Surv(days, adopted) ~ color, data = days_rep)
  lines(xfit, lwd = 4, col = alpha(c("black", "orange"), 0.5))
}

n <- 12
lapply(1:n, \(i) sim_cats1(n = 1e3, p = post2[i, c("p[1]", "p[2]")]) |>
         as.data.frame() |> mutate(sim = i)) |>
  bind_rows() |>
  survfit2(formula = Surv(days, adopted) ~ color + sim, data = _) |> # ggsurvfit version
  tidy_survfit() |>
  mutate(color = str_split_i(strata, ", ", 1),
         sim = str_split_i(strata, ", ", 2)) |>
  ggplot(aes(x = time, y = estimate, color = color, group = interaction(color, sim))) +
  geom_step(alpha = 0.5) +
  xlim(c(0, 50)) +
  scale_y_continuous(limits = c(0, 1), expand = expansion(mult = c(0, 0.02))) +
  scale_color_manual(values = c("1" = "black", "2" = "orange"),
                     labels = c("Black", "Other")) +
  labs(x = "Days", y = "Proportion un-adopted", color = "Color") +
  theme(legend.position = "none")

3.3 Model that uses parameters for censored observations

cat_code3 <- root("cat_adoptions", "adoptions_imputation.stan")

print_stan_file(cat_code3)

// imputation version
data{
  int N;
  array[N] int adopted; // 1/0 indicator
  vector[N] days;    // days until event
  array[N] int color;   // 1=black, 2=other
}
parameters{
  vector<lower=0,upper=1>[2] p;
  vector<lower=days>[N] days_imputed;
}
model{
  p ~ beta(1,10);
  for (i in 1:N) {
    real P = p[color[i]];
    if (adopted[i]==1) {
      target += log((1-P)^(days[i]-1) * P);
      days_imputed[i] ~ normal(days[i],0.01);
    } else {
      target += log((1-P)^(days_imputed[i]-1) * P);
    }
  }
}

fit3s <- cstan(cat_code3, data = sim_dat)

Posterior summary

print(fit3s)

        variable     mean   median    sd   mad       q5      q95 rhat ess_bulk ess_tail
 lp__            -6407.44 -6407.24 26.15 26.25 -6450.16 -6364.95 1.00     1392     2366
 p[1]                0.01     0.01  0.00  0.00     0.01     0.01 1.00     2438     3051
 p[2]                0.02     0.02  0.00  0.00     0.02     0.02 1.00     4462     3163
 days_imputed[1]   147.08   116.79 97.12 68.92    54.24   333.35 1.00     3370     1517
 days_imputed[2]   147.14   118.11 95.62 69.95    55.59   337.24 1.00     4571     2337
 days_imputed[3]    19.01    19.01  0.01  0.01    19.00    19.02 1.00     3830     2332
 days_imputed[4]    99.65    83.98 49.92 35.04    52.54   199.77 1.00     4477     2200
 days_imputed[5]    38.01    38.01  0.01  0.01    38.00    38.02 1.00     3520     2025
 days_imputed[6]    26.01    26.01  0.01  0.01    26.00    26.02 1.00     2903     1639
 days_imputed[7]    30.01    30.01  0.01  0.01    30.00    30.02 1.00     3316     1632

 # showing 10 of 1003 rows (change via 'max_rows' argument or 'cmdstanr_max_rows' option)

3.4 Poisson model

Model that uses Poisson count outcomes instead of duration outcomes to handle censoring. This depends upon constant hazard function though?

cat_code4 <- root("cat_adoptions", "adoptions_poisson.stan")

print_stan_file(cat_code4)

// poisson version
data{
  int N;
  array[N] int adopted; // 1/0 indicator
  vector[N] days;    // days until event
  array[N] int color;   // 1=black, 2=other
}
parameters{
  vector<lower=0>[2] lambda;
}
model{
  lambda ~ exponential(10.0);
  adopted ~ poisson(lambda[color] .* days);
}

fit4s <- cstan(cat_code4, data = sim_dat)

Posterior summary

print(fit4s)

  variable     mean   median   sd  mad       q5      q95 rhat ess_bulk ess_tail
 lp__      -1243.52 -1243.20 1.04 0.72 -1245.61 -1242.55 1.00     1832     2210
 lambda[1]     0.01     0.01 0.00 0.00     0.01     0.01 1.00     3088     2467
 lambda[2]     0.02     0.02 0.00 0.00     0.02     0.02 1.00     3076     2477

3.5 Varying effects model

Simulate background traits that differentiate cats of same color.

sim_cats3 <- function(n = 10, p = c(0.1, 0.2), cens = 50, xsd = c(0.1, 0.2)) {
  color <- rep(NA, n)
  days <- rep(NA, n)
  for (i in 1:n) {
    color[i] <- sample(c(1, 2), size = 1, replace = TRUE)
    z <- rnorm(1, 0, xsd[color[i]])
    pp <- inv_logit(logit(p[color[i]]) + z)
    days[i] <- cat_adopt(1, pp)
  }
  adopted <- ifelse(days < cens, 1, 0)
  days <- ifelse(adopted == 1, days, cens)
  return(list(N = n, days = days, color = color, adopted = adopted))
}
sim_dat <- sim_cats3(n = 1000, p = c(0.2, 0.1), xsd = c(0.1, 0.1))

Varying effects model

cat_code5 <- root("cat_adoptions", "adoptions_varying.stan")

print_stan_file(cat_code5)

// cats vary in their adoption probabilities
data{
  int N;
  array[N] int adopted; // 1/0 indicator
  array[N] int days;    // days until event
  array[N] int color;   // 1=black, 2=other
}
parameters{
  // average adoptions
  vector<lower=0,upper=1>[2] p;
  vector<lower=0>[2] theta; // dispersion
  array[N] vector<lower=0,upper=1>[2] q; // cat specific probabilities
}
model{
  p ~ beta(1,10);
  theta ~ exponential(1);
  for (i in 1:N) {
    real P = 0;
    for (j in 1:2)
      q[i,j] ~ beta(p[j]*theta[j], (1-p[j])*theta[j]);
    P = q[i,color[i]];
    if (adopted[i]==1) {
      target += log((1-P)^(days[i]-1) * P);
    } else {
      target += log((1-P)^days[i]);
    }
  }
}

fit2s <- cstan(cat_code2, data = sim_dat)
fit5s <- cstan(cat_code5, data = sim_dat)

4 Workflow

4.1 Prior predictive distributuon

Repeatedly sample from prior, simulate observations Prior draws

n <- 100
p1 <- rbeta(n, 1, 10)
p2 <- rbeta(n, 1, 10)
sim_cats2 <- function(n = 1e3, p = c(0.01, 0.02), cens = 50) {
  color <- sample(c(1, 2), size = n, replace = TRUE)
  days <- rgeom(n, p[color]) + 1
  adopted <- ifelse(days < cens, 1, 0)
  days <- ifelse(adopted == 1, days, cens)
  return(list(N = n, days = days, color = color, adopted = adopted))
}
prior_days <- sapply(1:n, function(i) sim_cats2(1, p = c(p1[i], p2[i]))$days)
plot(prior_days, xlab = "simulated cat", ylab = "days",
     pch = ifelse(prior_days == 50, 1, 16))

4.2 Posterior predictive distribution

Sample from posterior, simulate observations. Problem with this example: need to impute censored values so we’ll simulate Kaplan-Meier curves to compare to empirical curve. Plot empirical K-M curves

sfit <- survfit(Surv(days, adopted) ~ color, data = dat)
plot(sfit, lty = 1, lwd = 0.1, col = c("black", "orange"), xlim = c(0, 90),
     xlab = "Days", ylab = "Proportion un-adopted")

Simulate and draw

# n <- 12
# for (i in 1:n) {
#   days_rep <- sim_cats2(n = 1e3, p = post2$p[i, ], cens = 200)
#   xfit <- survfit(Surv(days, adopted) ~ color, data = days_rep)
#   lines(xfit, lwd = 1, col = alpha(c("black", "orange"), 0.5))
# }

Overlay empirical curves

# lines(sfit, lwd = 5, col = c("white", "white"))
# lines(sfit, lwd = 3, col = c("black", "orange"))

1 Introduction

2 Data

3 Generative models

3.1 First Stan model

3.2 Add observation (censoring) model

3.3 Model that uses parameters for censored observations

3.4 Poisson model

3.5 Varying effects model

4 Workflow

4.1 Prior predictive distributuon

4.2 Posterior predictive distribution

Licenses