Marginal posterior predictive checking

Tail area probabilities of marginal predictive distributions, aka probability integral transformation (PIT).

Normal model for light speed data.

ggplot2 is used for plotting, tidyr for manipulating data frames

library(ggplot2)
library(bayesplot)
theme_set(bayesplot::theme_default(base_family = "sans"))
library(tidyr)
library(latex2exp)
library(rprojroot)
root<-has_file(".BDA_R_demos_root")$make_fix_file()

Data

y <- read.table(root("demos_ch6","light.txt"))$V1

Sufficient statistics

n <- length(y)
s <- sd(y)
my <- mean(y)

Tail area probabilities of marginal predictive distributions, aka probability integral transformation (PIT)

Ty <- data.frame(x = pt((y - my)/(sqrt(1+1/n)*s), n-1))

#* Plot histogram of PIT values. Ideally histogram should be close to uniform.
title1 <- 'Light speed example
distribution of predictive distribution tail-values'

ggplot(data = Ty) +
  geom_histogram(aes(x = x), fill = 'steelblue',
                 color = 'black', binwidth = 0.05) +
  coord_cartesian(xlim = c(0, 1)) +
  labs(x = TeX(r"($\textit{p}(\textit{y}^{\textrm{rep}}_{\textit{i}} < \textit{y_i} | \textit{y})$)"),
       y = '', title = title1) +
  scale_y_continuous(breaks=NULL)

#* Plot ECDF of PIT values. Ideally ECDF should be close to diagonal line
ggplot(data=data.frame(x=Ty), aes(x)) +
  stat_ecdf(geom = "step", color=4) +
  xlim(c(0,1))+
  labs(x="Observed PIT values", y="ECDF")+
  annotate(geom="segment",x=0,y=0,xend=1,yend=1)

Repeat the PIT checking after removing two “outliers”

y <- y[y>0]

Sufficient statistics

n <- length(y)
s <- sd(y)
my <- mean(y)

Tail area probabilities of marginal predictive distributions, aka probability integral transformation (PIT)

Ty <- data.frame(x = pt((y - my)/(sqrt(1+1/n)*s), n-1))

Plot histogram of PIT values. Ideally histogram should be close to uniform.

title1 <- 'Light speed example
distribution of predictive distribution tail-values'
ggplot(data = Ty) +
  geom_histogram(aes(x = x), fill = 'steelblue',
                 color = 'black', binwidth = 0.05) +
  coord_cartesian(xlim = c(0, 1)) +
  labs(x = TeX(r"($\textit{p}(\textit{y}^{\textrm{rep}}_{\textit{i}} < \textit{y_i} | \textit{y})$)"),
       y = '', title = title1) +
  scale_y_continuous(breaks=NULL)

#* Plot ECDF of PIT values. Ideally ECDF should be close to diagonal line
ggplot(data=data.frame(x=Ty), aes(x)) +
  stat_ecdf(geom = "step", color=4) +
  xlim(c(0,1))+
  labs(x="Observed PIT values", y="ECDF")+
  annotate(geom="segment",x=0,y=0,xend=1,yend=1)

LS0tCnRpdGxlOiAiQmF5ZXNpYW4gZGF0YSBhbmFseXNpcyBkZW1vIDYuNCIKYXV0aG9yOiAiQWtpIFZlaHRhcmksIE1hcmt1cyBQYWFzaW5pZW1pIgpkYXRlOiAiYHIgZm9ybWF0KFN5cy5EYXRlKCkpYCIKb3V0cHV0OgogIGh0bWxfZG9jdW1lbnQ6CiAgICB0aGVtZTogcmVhZGFibGUKICAgIGNvZGVfZG93bmxvYWQ6IHRydWUKLS0tCiMjIE1hcmdpbmFsIHBvc3RlcmlvciBwcmVkaWN0aXZlIGNoZWNraW5nCgpUYWlsIGFyZWEgcHJvYmFiaWxpdGllcyBvZiBtYXJnaW5hbCBwcmVkaWN0aXZlIGRpc3RyaWJ1dGlvbnMsCmFrYSBwcm9iYWJpbGl0eSBpbnRlZ3JhbCB0cmFuc2Zvcm1hdGlvbiAoUElUKS4KCk5vcm1hbCBtb2RlbCBmb3IgbGlnaHQgc3BlZWQgZGF0YS4KCmdncGxvdDIgaXMgdXNlZCBmb3IgcGxvdHRpbmcsIHRpZHlyIGZvciBtYW5pcHVsYXRpbmcgZGF0YSBmcmFtZXMKCmBgYHtyIHNldHVwLCBtZXNzYWdlPUZBTFNFLCBlcnJvcj1GQUxTRSwgd2FybmluZz1GQUxTRX0KbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGJheWVzcGxvdCkKdGhlbWVfc2V0KGJheWVzcGxvdDo6dGhlbWVfZGVmYXVsdChiYXNlX2ZhbWlseSA9ICJzYW5zIikpCmxpYnJhcnkodGlkeXIpCmxpYnJhcnkobGF0ZXgyZXhwKQpsaWJyYXJ5KHJwcm9qcm9vdCkKcm9vdDwtaGFzX2ZpbGUoIi5CREFfUl9kZW1vc19yb290IikkbWFrZV9maXhfZmlsZSgpCmBgYAoKRGF0YQoKYGBge3IgfQp5IDwtIHJlYWQudGFibGUocm9vdCgiZGVtb3NfY2g2IiwibGlnaHQudHh0IikpJFYxCmBgYAoKU3VmZmljaWVudCBzdGF0aXN0aWNzCgpgYGB7ciB9Cm4gPC0gbGVuZ3RoKHkpCnMgPC0gc2QoeSkKbXkgPC0gbWVhbih5KQpgYGAKClRhaWwgYXJlYSBwcm9iYWJpbGl0aWVzIG9mIG1hcmdpbmFsIHByZWRpY3RpdmUgZGlzdHJpYnV0aW9ucywKYWthIHByb2JhYmlsaXR5IGludGVncmFsIHRyYW5zZm9ybWF0aW9uIChQSVQpCgpgYGB7ciB9ClR5IDwtIGRhdGEuZnJhbWUoeCA9IHB0KCh5IC0gbXkpLyhzcXJ0KDErMS9uKSpzKSwgbi0xKSkKCiMqIFBsb3QgaGlzdG9ncmFtIG9mIFBJVCB2YWx1ZXMuIElkZWFsbHkgaGlzdG9ncmFtIHNob3VsZCBiZSBjbG9zZSB0byB1bmlmb3JtLgp0aXRsZTEgPC0gJ0xpZ2h0IHNwZWVkIGV4YW1wbGUKZGlzdHJpYnV0aW9uIG9mIHByZWRpY3RpdmUgZGlzdHJpYnV0aW9uIHRhaWwtdmFsdWVzJwoKZ2dwbG90KGRhdGEgPSBUeSkgKwogIGdlb21faGlzdG9ncmFtKGFlcyh4ID0geCksIGZpbGwgPSAnc3RlZWxibHVlJywKICAgICAgICAgICAgICAgICBjb2xvciA9ICdibGFjaycsIGJpbndpZHRoID0gMC4wNSkgKwogIGNvb3JkX2NhcnRlc2lhbih4bGltID0gYygwLCAxKSkgKwogIGxhYnMoeCA9IFRlWChyIigkXHRleHRpdHtwfShcdGV4dGl0e3l9XntcdGV4dHJte3JlcH19X3tcdGV4dGl0e2l9fSA8IFx0ZXh0aXR7eV9pfSB8IFx0ZXh0aXR7eX0pJCkiKSwKICAgICAgIHkgPSAnJywgdGl0bGUgPSB0aXRsZTEpICsKICBzY2FsZV95X2NvbnRpbnVvdXMoYnJlYWtzPU5VTEwpCgojKiBQbG90IEVDREYgb2YgUElUIHZhbHVlcy4gSWRlYWxseSBFQ0RGIHNob3VsZCBiZSBjbG9zZSB0byBkaWFnb25hbCBsaW5lCmdncGxvdChkYXRhPWRhdGEuZnJhbWUoeD1UeSksIGFlcyh4KSkgKwogIHN0YXRfZWNkZihnZW9tID0gInN0ZXAiLCBjb2xvcj00KSArCiAgeGxpbShjKDAsMSkpKwogIGxhYnMoeD0iT2JzZXJ2ZWQgUElUIHZhbHVlcyIsIHk9IkVDREYiKSsKICBhbm5vdGF0ZShnZW9tPSJzZWdtZW50Iix4PTAseT0wLHhlbmQ9MSx5ZW5kPTEpCmBgYAoKUmVwZWF0IHRoZSBQSVQgY2hlY2tpbmcgYWZ0ZXIgcmVtb3ZpbmcgdHdvICJvdXRsaWVycyIKCmBgYHtyIH0KeSA8LSB5W3k+MF0KYGBgCgpTdWZmaWNpZW50IHN0YXRpc3RpY3MKCmBgYHtyIH0KbiA8LSBsZW5ndGgoeSkKcyA8LSBzZCh5KQpteSA8LSBtZWFuKHkpCmBgYAoKVGFpbCBhcmVhIHByb2JhYmlsaXRpZXMgb2YgbWFyZ2luYWwgcHJlZGljdGl2ZSBkaXN0cmlidXRpb25zLApha2EgcHJvYmFiaWxpdHkgaW50ZWdyYWwgdHJhbnNmb3JtYXRpb24gKFBJVCkKCmBgYHtyIH0KVHkgPC0gZGF0YS5mcmFtZSh4ID0gcHQoKHkgLSBteSkvKHNxcnQoMSsxL24pKnMpLCBuLTEpKQpgYGAKClBsb3QgaGlzdG9ncmFtIG9mIFBJVCB2YWx1ZXMuIElkZWFsbHkgaGlzdG9ncmFtIHNob3VsZCBiZSBjbG9zZSB0byB1bmlmb3JtLgoKYGBge3IgfQp0aXRsZTEgPC0gJ0xpZ2h0IHNwZWVkIGV4YW1wbGUKZGlzdHJpYnV0aW9uIG9mIHByZWRpY3RpdmUgZGlzdHJpYnV0aW9uIHRhaWwtdmFsdWVzJwpnZ3Bsb3QoZGF0YSA9IFR5KSArCiAgZ2VvbV9oaXN0b2dyYW0oYWVzKHggPSB4KSwgZmlsbCA9ICdzdGVlbGJsdWUnLAogICAgICAgICAgICAgICAgIGNvbG9yID0gJ2JsYWNrJywgYmlud2lkdGggPSAwLjA1KSArCiAgY29vcmRfY2FydGVzaWFuKHhsaW0gPSBjKDAsIDEpKSArCiAgbGFicyh4ID0gVGVYKHIiKCRcdGV4dGl0e3B9KFx0ZXh0aXR7eX1ee1x0ZXh0cm17cmVwfX1fe1x0ZXh0aXR7aX19IDwgXHRleHRpdHt5X2l9IHwgXHRleHRpdHt5fSkkKSIpLAogICAgICAgeSA9ICcnLCB0aXRsZSA9IHRpdGxlMSkgKwogIHNjYWxlX3lfY29udGludW91cyhicmVha3M9TlVMTCkKCiMqIFBsb3QgRUNERiBvZiBQSVQgdmFsdWVzLiBJZGVhbGx5IEVDREYgc2hvdWxkIGJlIGNsb3NlIHRvIGRpYWdvbmFsIGxpbmUKZ2dwbG90KGRhdGE9ZGF0YS5mcmFtZSh4PVR5KSwgYWVzKHgpKSArCiAgc3RhdF9lY2RmKGdlb20gPSAic3RlcCIsIGNvbG9yPTQpICsKICB4bGltKGMoMCwxKSkrCiAgbGFicyh4PSJPYnNlcnZlZCBQSVQgdmFsdWVzIiwgeT0iRUNERiIpKwogIGFubm90YXRlKGdlb209InNlZ21lbnQiLHg9MCx5PTAseGVuZD0xLHllbmQ9MSkKYGBgCgo=