Posterior predictive checking demo

Checking the assumption of independence in binomial trials (BDA3 p. 147)

ggplot2 is used for plotting, tidyr for manipulating data frames

library(ggplot2)
theme_set(theme_minimal())
library(tidyr)
library(latex2exp)

Data

y <- c(1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0)

Compute test statistic for the data. Test statistic is the number of switches from 0 to 1 or from 1 to 0.

Ty <- sum(diff(y) != 0) + 0.0

Sufficient statistics

n <- length(y)
s <- sum(y)

Compute test statistic for the replicate data.

rb <- function(s, n) {
  p <- rbeta(1, s+1, n-s+1)
  yr <- rbinom(n, 1, p)
  sum(diff(yr) != 0) + 0.0
}
Tyr <- data.frame(x = replicate(10000, rb(s, n)))

Compute posterior predictive p-value

mean(Tyr<=Ty)
## [1] 0.0287

Plot test statistics for the data and replicates. Vertical line corresponds to the original data, and the histogram to the replicate data.

title1 <- 'Binomial example - number of changes?
Pr(T(yrep,theta) <= T(y,theta)|y) = 0.03'
ggplot(data = Tyr) +
  geom_histogram(aes(x = x), fill = 'steelblue',
                 color = 'black', binwidth = 1) +
  geom_vline(aes(xintercept = x), data = data.frame(x = Ty),
             color = 'red') +
  labs(x = TeX('Number of changes in \\mathit{y} and \\mathit{y}^{\\mathrm{rep}}'),
       y = '', title = title1) +
  scale_y_continuous(breaks=NULL)

LS0tCnRpdGxlOiAiQmF5ZXNpYW4gZGF0YSBhbmFseXNpcyBkZW1vIDYuMiIKYXV0aG9yOiAiQWtpIFZlaHRhcmksIE1hcmt1cyBQYWFzaW5pZW1pIgpkYXRlOiAiYHIgZm9ybWF0KFN5cy5EYXRlKCkpYCIKb3V0cHV0OgogIGh0bWxfZG9jdW1lbnQ6CiAgICB0aGVtZTogcmVhZGFibGUKICAgIGNvZGVfZG93bmxvYWQ6IHRydWUKLS0tCiMjIFBvc3RlcmlvciBwcmVkaWN0aXZlIGNoZWNraW5nIGRlbW8KCkNoZWNraW5nIHRoZSBhc3N1bXB0aW9uIG9mIGluZGVwZW5kZW5jZSBpbiBiaW5vbWlhbCB0cmlhbHMgIChCREEzIHAuIDE0NykKCmdncGxvdDIgaXMgdXNlZCBmb3IgcGxvdHRpbmcsIHRpZHlyIGZvciBtYW5pcHVsYXRpbmcgZGF0YSBmcmFtZXMKCmBgYHtyIHNldHVwLCBtZXNzYWdlPUZBTFNFLCBlcnJvcj1GQUxTRSwgd2FybmluZz1GQUxTRX0KbGlicmFyeShnZ3Bsb3QyKQp0aGVtZV9zZXQodGhlbWVfbWluaW1hbCgpKQpsaWJyYXJ5KHRpZHlyKQpsaWJyYXJ5KGxhdGV4MmV4cCkKYGBgCgpEYXRhCgpgYGB7ciB9CnkgPC0gYygxLCAxLCAwLCAwLCAwLCAwLCAwLCAxLCAxLCAxLCAxLCAxLCAwLCAwLCAwLCAwLCAwLCAwLCAwLCAwKQpgYGAKCkNvbXB1dGUgdGVzdCBzdGF0aXN0aWMgZm9yIHRoZSBkYXRhLgpUZXN0IHN0YXRpc3RpYyBpcyB0aGUgbnVtYmVyIG9mIHN3aXRjaGVzIGZyb20gMCB0byAxIG9yIGZyb20gMSB0byAwLgoKYGBge3IgfQpUeSA8LSBzdW0oZGlmZih5KSAhPSAwKSArIDAuMApgYGAKClN1ZmZpY2llbnQgc3RhdGlzdGljcwoKYGBge3IgfQpuIDwtIGxlbmd0aCh5KQpzIDwtIHN1bSh5KQpgYGAKCkNvbXB1dGUgdGVzdCBzdGF0aXN0aWMgZm9yIHRoZSByZXBsaWNhdGUgZGF0YS4KCmBgYHtyIH0KcmIgPC0gZnVuY3Rpb24ocywgbikgewogIHAgPC0gcmJldGEoMSwgcysxLCBuLXMrMSkKICB5ciA8LSByYmlub20obiwgMSwgcCkKICBzdW0oZGlmZih5cikgIT0gMCkgKyAwLjAKfQpUeXIgPC0gZGF0YS5mcmFtZSh4ID0gcmVwbGljYXRlKDEwMDAwLCByYihzLCBuKSkpCmBgYAoKQ29tcHV0ZSBwb3N0ZXJpb3IgcHJlZGljdGl2ZSBwLXZhbHVlCgpgYGB7ciB9Cm1lYW4oVHlyPD1UeSkKYGBgCgpQbG90IHRlc3Qgc3RhdGlzdGljcyBmb3IgdGhlIGRhdGEgYW5kIHJlcGxpY2F0ZXMuClZlcnRpY2FsIGxpbmUgY29ycmVzcG9uZHMgdG8gdGhlIG9yaWdpbmFsIGRhdGEsIGFuZAp0aGUgaGlzdG9ncmFtIHRvIHRoZSByZXBsaWNhdGUgZGF0YS4KCmBgYHtyIH0KdGl0bGUxIDwtICdCaW5vbWlhbCBleGFtcGxlIC0gbnVtYmVyIG9mIGNoYW5nZXM/ClByKFQoeXJlcCx0aGV0YSkgPD0gVCh5LHRoZXRhKXx5KSA9IDAuMDMnCmdncGxvdChkYXRhID0gVHlyKSArCiAgZ2VvbV9oaXN0b2dyYW0oYWVzKHggPSB4KSwgZmlsbCA9ICdzdGVlbGJsdWUnLAogICAgICAgICAgICAgICAgIGNvbG9yID0gJ2JsYWNrJywgYmlud2lkdGggPSAxKSArCiAgZ2VvbV92bGluZShhZXMoeGludGVyY2VwdCA9IHgpLCBkYXRhID0gZGF0YS5mcmFtZSh4ID0gVHkpLAogICAgICAgICAgICAgY29sb3IgPSAncmVkJykgKwogIGxhYnMoeCA9IFRlWCgnTnVtYmVyIG9mIGNoYW5nZXMgaW4gXFxtYXRoaXR7eX0gYW5kIFxcbWF0aGl0e3l9XntcXG1hdGhybXtyZXB9fScpLAogICAgICAgeSA9ICcnLCB0aXRsZSA9IHRpdGxlMSkgKwogIHNjYWxlX3lfY29udGludW91cyhicmVha3M9TlVMTCkKYGBgCgo=