Analyse the effect of integrated pest management on reducing cockroach levels in urban apartments. See Chapter 15 in Regression and Other Stories.
library("rprojroot")
root<-has_file(".ROS-Examples-root")$make_fix_file()
library("rstanarm")
library("brms")
library("loo")
library("ggplot2")
library("bayesplot")
theme_set(bayesplot::theme_default(base_family = "sans"))Set random seed for reproducability
SEED <- 3579data(roaches)
(n <- nrow(roaches))[1] 262scale the number of roaches by 100
roaches$roach100 <- roaches$roach1 / 100
head(roaches)    y roach1 treatment senior exposure2 roach100
1 153 308.00         1      0  0.800000   3.0800
2 127 331.25         1      0  0.600000   3.3125
3   7   1.67         1      0  1.000000   0.0167
4   7   3.00         1      0  1.000000   0.0300
5   0   2.00         1      0  1.142857   0.0200
6   0   0.00         1      0  1.000000   0.0000negative-binomial model is over-dispersed compared to Poisson
fit_1 <- stan_glm(y ~ roach100 + treatment + senior, family=neg_binomial_2,
  offset=log(exposure2), data=roaches, seed=SEED, refresh=0)
prior_summary(fit_1)Priors for model 'fit_1' 
------
Intercept (after predictors centered)
 ~ normal(location = 0, scale = 2.5)
Coefficients
  Specified prior:
    ~ normal(location = [0,0,0], scale = [2.5,2.5,2.5])
  Adjusted prior:
    ~ normal(location = [0,0,0], scale = [3.32,5.10,5.42])
Auxiliary (reciprocal_dispersion)
 ~ exponential(rate = 1)
------
See help('prior_summary.stanreg') for more detailsprint(fit_1, digits=2)stan_glm
 family:       neg_binomial_2 [log]
 formula:      y ~ roach100 + treatment + senior
 observations: 262
 predictors:   4
------
            Median MAD_SD
(Intercept)  2.84   0.23 
roach100     1.31   0.25 
treatment   -0.79   0.24 
senior      -0.34   0.27 
Auxiliary parameter(s):
                      Median MAD_SD
reciprocal_dispersion 0.27   0.03  
------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanregloo_1 <- loo(fit_1)yrep_1 <- posterior_predict(fit_1)
n_sims <- nrow(yrep_1)
sims_display <- sample(n_sims, 100)
ppc_1 <- ppc_dens_overlay(log10(roaches$y+1), log10(yrep_1[sims_display,]+1))+
  xlab('log10(y+1)') +
  theme(axis.line.y = element_blank())
ppc_1ppc with proportion of zero counts test statistic
ppc_stat(y=roaches$y, yrep=yrep_1, stat=function(y) mean(y==0))or
print(mean(roaches$y==0), digits=2)[1] 0.36print(mean(yrep_1==0), digits=2)[1] 0.34ppc with proportion of counts of 1 test statistic
ppc_stat(y=roaches$y, yrep=yrep_1, stat=function(y) mean(y==1))or
print(mean(roaches$y==1), digits=2)[1] 0.076print(mean(yrep_1==1), digits=2)[1] 0.089ppc with 95% quantile test statistic
ppc_stat(y=roaches$y, yrep=yrep_1, stat=function(y) quantile(y, probs=0.95))ppc with 99% quantile test statistic
ppc_stat(y=roaches$y, yrep=yrep_1, stat=function(y) quantile(y, probs=0.99))ppc with max count test statistic
ppc_stat(y=roaches$y, yrep=yrep_1, stat=max)or
print(max(roaches$y), digits=2)[1] 357print(max(yrep_1), digits=2)[1] 2232785Poisson is a special case of negative-binomial
fit_2 <- stan_glm(y ~ roach100 + treatment + senior, family=poisson,
  offset=log(exposure2), data=roaches, seed=SEED, refresh=0)
prior_summary(fit_2)Priors for model 'fit_2' 
------
Intercept (after predictors centered)
 ~ normal(location = 0, scale = 2.5)
Coefficients
  Specified prior:
    ~ normal(location = [0,0,0], scale = [2.5,2.5,2.5])
  Adjusted prior:
    ~ normal(location = [0,0,0], scale = [3.32,5.10,5.42])
------
See help('prior_summary.stanreg') for more detailsprint(fit_2, digits=2)stan_glm
 family:       poisson [log]
 formula:      y ~ roach100 + treatment + senior
 observations: 262
 predictors:   4
------
            Median MAD_SD
(Intercept)  3.09   0.02 
roach100     0.70   0.01 
treatment   -0.52   0.02 
senior      -0.38   0.03 
------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanregloo_2 <- loo(fit_2)
loo_compare(loo_1, loo_2)      elpd_diff se_diff
fit_1     0.0       0.0
fit_2 -5347.3     707.9instead of y, we plot log10(y+1) to better show the differences in the shape of the predictive distribution
yrep_2 <- posterior_predict(fit_2)
n_sims <- nrow(yrep_2)
sims_display <- sample(n_sims, 100)
ppc_2 <- ppc_dens_overlay(log10(roaches$y+1), log10(yrep_2[sims_display,]+1)) +
  xlim(0,3) +
  xlab('log10(y+1)') +
  theme(axis.line.y = element_blank())
ppc_2test statistic used is the proportion of zero counts
ppc_stat(y=roaches$y, yrep=yrep_2, stat=function(y) mean(y==0))or
print(mean(roaches$y==0), digits=2)[1] 0.36print(mean(yrep_2==0), digits=2)[1] 0.00066Zero-inflated negative-binomial model is mixture of two models - logistic regression to model the proportion of extra zero counts - negative-binomial model
we switch to brms as rstanarm doesn't support zero-inflated negative-binomial model
roaches$logp1_roach1 <- log(roaches$roach1+1)
roaches$log_exposure2 <- log(roaches$exposure2)fit_3 <- brm(bf(y ~ logp1_roach1 + treatment + senior + offset(log_exposure2),
                zi ~ logp1_roach1 + treatment + senior + offset(log_exposure2)),
             family=zero_inflated_negbinomial(), data=roaches,
             prior=set_prior("normal(0,1)"), seed=SEED, refresh=500)print(fit_3) Family: zero_inflated_negbinomial 
  Links: mu = log; shape = identity; zi = logit 
Formula: y ~ logp1_roach1 + treatment + senior + offset(log_exposure2) 
         zi ~ logp1_roach1 + treatment + senior + offset(log_exposure2)
   Data: roaches (Number of observations: 262) 
Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
         total post-warmup samples = 4000
Population-Level Effects: 
                Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept           2.11      0.26     1.63     2.61 1.00     3352     2694
zi_Intercept       -1.11      0.82    -3.05     0.19 1.00     2195     2323
logp1_roach1        0.52      0.07     0.39     0.64 1.00     2813     2754
treatment          -0.79      0.22    -1.21    -0.36 1.00     3621     2968
senior              0.19      0.27    -0.33     0.72 1.00     3764     2704
zi_logp1_roach1    -1.29      0.35    -2.13    -0.78 1.00     2045     1258
zi_treatment        1.70      0.74     0.41     3.33 1.00     2376     2100
zi_senior           1.38      0.67     0.13     2.80 1.00     2634     2408
Family Specific Parameters: 
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
shape     0.49      0.06     0.38     0.63 1.00     2414     2398
Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).loo_3 <- loo(fit_3)
loo_compare(loo_1, loo_3)      elpd_diff se_diff
fit_3   0.0       0.0  
fit_1 -37.0      11.6  yrep_3 <- posterior_predict(fit_3)
ppc_3 <- ppc_dens_overlay(log10(roaches$y+1), log10(yrep_3[sims_display,]+1))+
  xlab('log10(y+1)') +
  theme(axis.line.y = element_blank())
ppc_3ppc with zero count test statistic
ppc_stat(y=roaches$y, yrep=yrep_3, stat=function(y) mean(y==0))or
print(mean(roaches$y==0), digits=2)[1] 0.36print(mean(yrep_3==0), digits=2)[1] 0.37ppc with 95% quantile test statistic
ppc_stat(y=roaches$y, yrep=yrep_3, stat=function(y) quantile(y, probs=0.95))ppc with 99% quantile test statistic
ppc_stat(y=roaches$y, yrep=yrep_3, stat=function(y) quantile(y, probs=0.99))ppc with max count test statistic
ppc_stat(y=roaches$y, yrep=yrep_3, stat=max)or
print(max(roaches$y), digits=2)[1] 357print(max(yrep_3), digits=2)[1] 7475