Notebook for Assignment 9

Author

Aki Vehtari et al.

1 General information

This assignment relates to Lectures 9-10 and Chapter 9 of BDA3

We recommend using JupyterHub (which has all the needed packages pre-installed).

Reading instructions: - For background on Bayes-R2 - The reading instructions for BDA3 Chapter 9 (decision analysis).

General Instructions for Answering the Assignment Questions
  • Questions below are exact copies of the text found in the MyCourses quiz and should serve as a notebook where you can store notes and code.
  • We recommend opening these notebooks in the Aalto JupyterHub, see how to use R and RStudio remotely.
  • For inspiration for code, have a look at the BDA R Demos and the specific Assignment code notebooks
  • Recommended additional self study exercises for each chapter in BDA3 are listed in the course web page. These will help to gain deeper understanding of the topic.
  • Common questions and answers regarding installation and technical problems can be found in Frequently Asked Questions (FAQ).
  • Deadlines for all assignments can be found on the course web page and in MyCourses.
  • You are allowed to discuss assignments with your friends, but it is not allowed to copy solutions directly from other students or from internet.
  • Do not share your answers publicly.
  • Do not copy answers from the internet or from previous years. We compare the answers to the answers from previous years and to the answers from other students this year.
  • Use of AI is allowed on the course, but the most of the work needs to by the student, and you need to report whether you used AI and in which way you used them (See points 5 and 6 in Aalto guidelines for use of AI in teaching).
  • All suspected plagiarism will be reported and investigated. See more about the Aalto University Code of Academic Integrity and Handling Violations Thereof.
  • If you have any suggestions or improvements to the course material, please post in the course chat feedback channel, create an issue, or submit a pull request to the public repository!
if (!require(brms)) {
    install.packages("brms")
    library(brms)
}
Loading required package: brms
Loading required package: Rcpp
Loading 'brms' package (version 2.22.0). Useful instructions
can be found by typing help('brms'). A more detailed introduction
to the package is available through vignette('brms_overview').

Attaching package: 'brms'
The following object is masked from 'package:stats':

    ar
if(!require(cmdstanr)){
    install.packages("cmdstanr", repos = c("https://mc-stan.org/r-packages/", getOption("repos")))
    library(cmdstanr)
}
Loading required package: cmdstanr
This is cmdstanr version 0.8.1
- CmdStanR documentation and vignettes: mc-stan.org/cmdstanr
- CmdStan path: /home/runner/.cmdstan/cmdstan-2.35.0
- CmdStan version: 2.35.0
cmdstan_installed <- function(){
  res <- try(out <- cmdstanr::cmdstan_path(), silent = TRUE)
  !inherits(res, "try-error")
}
if(!cmdstan_installed()){
    install_cmdstan()
}

if(!require(ggplot2)){
    install.packages("ggplot2")
    library(ggplot2)
}
Loading required package: ggplot2
if(!require(bayesplot)){
    install.packages("bayesplot")
    library(bayesplot)
}
Loading required package: bayesplot
This is bayesplot version 1.11.1
- Online documentation and vignettes at mc-stan.org/bayesplot
- bayesplot theme set to bayesplot::theme_default()
   * Does _not_ affect other ggplot2 plots
   * See ?bayesplot_theme_set for details on theme setting

Attaching package: 'bayesplot'
The following object is masked from 'package:brms':

    rhat
if(!require(dplyr)){
    install.packages("dplyr")
    library(dplyr)
}
Loading required package: dplyr

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
if(!require(tidyr)){
    install.packages("tidyr")
    library(tidyr)
}
Loading required package: tidyr
if(!require(matrixStats)){
    install.packages("matrixStats")
    library(matrixStats)
}
Loading required package: matrixStats

Attaching package: 'matrixStats'
The following object is masked from 'package:dplyr':

    count
if(!require(ggdist)){
    install.packages("ggdist")
    library(ggdist)
}
Loading required package: ggdist

Attaching package: 'ggdist'
The following objects are masked from 'package:brms':

    dstudent_t, pstudent_t, qstudent_t, rstudent_t
if(!require(tinytable)){
    install.packages("tinytable")
    library(tinytable)
}
Loading required package: tinytable
if(!require(posterior)){
    install.packages("posterior")
    library(posterior)
}
Loading required package: posterior
This is posterior version 1.6.0

Attaching package: 'posterior'
The following object is masked from 'package:bayesplot':

    rhat
The following objects are masked from 'package:stats':

    mad, sd, var
The following objects are masked from 'package:base':

    %in%, match
if(!require(patchwork)){
    install.packages("patchwork")
    library(patchwork)
}
Loading required package: patchwork
if(!require(mvtnorm)){
    install.packages("mvtnorm")
    library(mvtnorm)
}
Loading required package: mvtnorm
theme_set(bayesplot::theme_default(base_family = "sans"))

2 R2

Compute and plot the prior predictive R2 distributions for the standard normal prior and R2 prior normal models. Modify the code below.

sims <- 10000
p <- 26
n <- 400
X <- rmvnorm(n,mean = array(0,p),sigma = diag(array(1,p)))


## Prior Predictive Graphs: Normal
ppR2_gausprior<-numeric()

for (i in 1:sims) {
  sigma2 <- rexp(1,rate=1/3)^2
  beta <- rnorm(p)
  mu <- X%*%as.vector(beta)
  muvar <- var(mu)
  ppR2_gausprior[i] <- muvar/(muvar+sigma2)
}
ggplot()+geom_histogram(aes(x=ppR2_gausprior), breaks=seq(0,1,length.out=50)) +
  xlim(c(0,1)) +
  scale_y_continuous(breaks=NULL) +
  labs(x="Prior predictive Bayesian R^2",y="")

## Prior Predictive Graphs: R^2 prior
ppR2_r2prior<-numeric()
mu_R2 <- 0.5
scale_R2 <- 2

for (i in 1:sims) {
  sigma2 <- rexp(1,rate=1/3)^2
  R2 <- rbeta(1,(mu_R2*scale_R2),((1-mu_R2)*scale_R2))
  tau2 <- R2/(1-R2)
  psi <- rdirichlet(1,rep(1,p))
  beta <- rnorm(p) * sqrt(tau2*psi*sigma2)
  mu <- X%*%as.vector(beta)
  muvar <- var(mu)
  ppR2_r2prior[i] <- muvar/(muvar+sigma2)
}
ggplot()+geom_histogram(aes(x=ppR2_r2prior), breaks=seq(0,1,length.out=50)) +
  xlim(c(0,1)) +
  scale_y_continuous(breaks=NULL) +
  labs(x="Prior predictive Bayesian R^2",y="")

3 Portuguese Student Data

3.1 Data Prep

# Load the data
student <- read.csv(url('https://raw.githubusercontent.com/avehtari/ROS-Examples/master/Student/data/student-merged.csv'))

# Predictors to be used
predictors <- c("school","sex","age","address","famsize","Pstatus","Medu","Fedu","traveltime","studytime","failures","schoolsup","famsup","paid","activities", "nursery", "higher", "internet", "romantic","famrel","freetime","goout","Dalc","Walc","health","absences")
p <- length(predictors)

# To reduce variability us the median grades based on those three
# exams for each topic, students with non-zero grades are selected
grades <- c("G1mat","G2mat","G3mat","G1por","G2por","G3por")
student <- student %>%
  mutate(across(matches("G[1-3]..."), ~na_if(.,0))) %>%
  mutate(Gmat = rowMedians(as.matrix(select(.,matches("G.mat"))), na.rm=TRUE),
         Gpor = rowMedians(as.matrix(select(.,matches("G.por"))), na.rm=TRUE))
student_Gmat <- subset(student, is.finite(Gmat), select=c("Gmat",predictors))
student_Gmat <- student_Gmat[is.finite(rowMeans(student_Gmat)),]
student_Gpor <- subset(student, is.finite(Gpor), select=c("Gpor",predictors))
(nmat <- nrow(student_Gmat))
[1] 382
# Look at the data
head(student_Gmat) |> tt()
Gmat school sex age address famsize Pstatus Medu Fedu traveltime studytime failures schoolsup famsup paid activities nursery higher internet romantic famrel freetime goout Dalc Walc health absences
10 0 0 15 0 0 1 1 1 2 4 1 1 1 1 1 1 1 1 0 3 1 2 1 1 1 2
6 0 0 15 0 0 1 1 1 1 2 2 1 1 0 0 0 1 1 1 3 3 4 2 4 5 2
13 0 0 15 0 0 1 2 2 1 1 0 1 1 1 1 1 1 0 0 4 3 1 1 1 2 8
9 0 0 15 0 0 1 2 4 1 3 0 1 1 1 1 1 1 1 0 4 3 2 1 1 5 2
10 0 0 15 0 0 1 3 3 2 3 2 0 1 1 1 1 1 1 1 4 2 1 2 3 3 8
12 0 0 15 0 0 1 3 4 1 3 0 1 1 1 1 1 1 1 0 4 3 2 1 1 5 2
# Visualise the distributions of median math scores
p1 <- ggplot(student_Gmat, aes(x=Gmat)) + geom_dots() + labs(x='Median math exam score')
p1

# Some data transformation for non-binary predictors to have standard
# deviation 1
studentstd_Gmat <- student_Gmat
Gmatbin<-apply(student_Gmat[,predictors], 2, function(x) {length(unique(x))==2})
studentstd_Gmat[,predictors[!Gmatbin]] <-scale(student_Gmat[,predictors[!Gmatbin]])
studentstd_Gpor <- student_Gpor
Gporbin<-apply(student_Gpor[,predictors], 2, function(x) {length(unique(x))==2})
studentstd_Gpor[,predictors[!Gporbin]] <-scale(student_Gpor[,predictors[!Gporbin]])

# Set Seed for reproducibility
SEED <- 2132

3.2 Normal Model

Estimate model

# Estimate Model
fit1 <- brm(Gmat ~ ., data = studentstd_Gmat,
             normalize=FALSE,
            seed = SEED)
Compiling Stan program...
Error in .fun(model_code = .x1): Boost not found; call install.packages('BH')

Visualise posteriors

# Plot marginal posteriors of the coefficients
drawsmu <- as_draws_df(fit1, variable=paste0('b_',predictors)) |>
  set_variables(predictors)
Error: object 'fit1' not found
p <- mcmc_areas(drawsmu,
                prob_outer=0.98, area_method = "scaled height") +
  xlim(c(-3,3))
Error: object 'drawsmu' not found
p <- p + scale_y_discrete(limits = rev(levels(p$data$parameter)))
Error in p$data: $ operator is invalid for atomic vectors
p
[1] 26

Now, find prior predictive R2

X <- studentstd_Gmat[,2:dim(student_Gmat)[2]] 

ppR2_1<-numeric()
sims <- 4000

for (i in 1:sims) {
  sigma2 <- rstudent_t(1,3,0,3)^2
  beta <- rnorm(length(predictors))
  mu <- as.matrix(X)%*%as.vector(beta)
  muvar <- var(mu)
  ppR2_1[i] <- muvar/(muvar+sigma2)
}

Plot prior predictive R2 vs posterior R2

data <- data.frame(Prior=ppR2_1,Posterior=bayes_R2(fit1, summary=FALSE)) 
Error: object 'fit1' not found
names(data) <- c("Prior","Posterior")
Error in names(data) <- c("Prior", "Posterior"): names() applied to a non-vector
mcmc_hist(data,
          breaks=seq(0,1,length.out=100),
          facet_args = list(nrow = 2)) +
  facet_text(size = 13) +
  scale_x_continuous(limits = c(0,1), expand = c(0, 0),
                     labels = c("0","0.25","0.5","0.75","1")) +
  theme(axis.line.y = element_blank()) +
  xlab("Bayesian R^2")
Error in dim(x) <- length(x): invalid first argument, must be vector (list or atomic)

Bayes-R2 and LOO-R2

bayes_R2(fit1) |> as.data.frame() |> tt()
Error: object 'fit1' not found
loo_R2(fit1) |> as.data.frame() |> tt()
Error: object 'fit1' not found

3.3 R2 Model

Estimate the model, amend the code below

fit2 <- brm(Gmat ~ ., data = studentstd_Gmat,
            seed = SEED,
            normalize = FALSE,
            prior=c(prior(R2D2(mean_R2 = 0.5, prec_R2 = 1, cons_D2 = 1,
                               autoscale = TRUE),class=b)),
            backend = "cmdstanr")
Start sampling
Running MCMC with 4 sequential chains...

Chain 1 Iteration:    1 / 2000 [  0%]  (Warmup) 
Chain 1 Iteration:  100 / 2000 [  5%]  (Warmup) 
Chain 1 Iteration:  200 / 2000 [ 10%]  (Warmup) 
Chain 1 Iteration:  300 / 2000 [ 15%]  (Warmup) 
Chain 1 Iteration:  400 / 2000 [ 20%]  (Warmup) 
Chain 1 Iteration:  500 / 2000 [ 25%]  (Warmup) 
Chain 1 Iteration:  600 / 2000 [ 30%]  (Warmup) 
Chain 1 Iteration:  700 / 2000 [ 35%]  (Warmup) 
Chain 1 Iteration:  800 / 2000 [ 40%]  (Warmup) 
Chain 1 Iteration:  900 / 2000 [ 45%]  (Warmup) 
Chain 1 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
Chain 1 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
Chain 1 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
Chain 1 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
Chain 1 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
Chain 1 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
Chain 1 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
Chain 1 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
Chain 1 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
Chain 1 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
Chain 1 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
Chain 1 Iteration: 2000 / 2000 [100%]  (Sampling) 
Chain 1 finished in 0.7 seconds.
Chain 2 Iteration:    1 / 2000 [  0%]  (Warmup) 
Chain 2 Iteration:  100 / 2000 [  5%]  (Warmup) 
Chain 2 Iteration:  200 / 2000 [ 10%]  (Warmup) 
Chain 2 Iteration:  300 / 2000 [ 15%]  (Warmup) 
Chain 2 Iteration:  400 / 2000 [ 20%]  (Warmup) 
Chain 2 Iteration:  500 / 2000 [ 25%]  (Warmup) 
Chain 2 Iteration:  600 / 2000 [ 30%]  (Warmup) 
Chain 2 Iteration:  700 / 2000 [ 35%]  (Warmup) 
Chain 2 Iteration:  800 / 2000 [ 40%]  (Warmup) 
Chain 2 Iteration:  900 / 2000 [ 45%]  (Warmup) 
Chain 2 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
Chain 2 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
Chain 2 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
Chain 2 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
Chain 2 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
Chain 2 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
Chain 2 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
Chain 2 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
Chain 2 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
Chain 2 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
Chain 2 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
Chain 2 Iteration: 2000 / 2000 [100%]  (Sampling) 
Chain 2 finished in 0.6 seconds.
Chain 3 Iteration:    1 / 2000 [  0%]  (Warmup) 
Chain 3 Iteration:  100 / 2000 [  5%]  (Warmup) 
Chain 3 Iteration:  200 / 2000 [ 10%]  (Warmup) 
Chain 3 Iteration:  300 / 2000 [ 15%]  (Warmup) 
Chain 3 Iteration:  400 / 2000 [ 20%]  (Warmup) 
Chain 3 Iteration:  500 / 2000 [ 25%]  (Warmup) 
Chain 3 Iteration:  600 / 2000 [ 30%]  (Warmup) 
Chain 3 Iteration:  700 / 2000 [ 35%]  (Warmup) 
Chain 3 Iteration:  800 / 2000 [ 40%]  (Warmup) 
Chain 3 Iteration:  900 / 2000 [ 45%]  (Warmup) 
Chain 3 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
Chain 3 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
Chain 3 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
Chain 3 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
Chain 3 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
Chain 3 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
Chain 3 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
Chain 3 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
Chain 3 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
Chain 3 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
Chain 3 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
Chain 3 Iteration: 2000 / 2000 [100%]  (Sampling) 
Chain 3 finished in 0.7 seconds.
Chain 4 Iteration:    1 / 2000 [  0%]  (Warmup) 
Chain 4 Iteration:  100 / 2000 [  5%]  (Warmup) 
Chain 4 Iteration:  200 / 2000 [ 10%]  (Warmup) 
Chain 4 Iteration:  300 / 2000 [ 15%]  (Warmup) 
Chain 4 Iteration:  400 / 2000 [ 20%]  (Warmup) 
Chain 4 Iteration:  500 / 2000 [ 25%]  (Warmup) 
Chain 4 Iteration:  600 / 2000 [ 30%]  (Warmup) 
Chain 4 Iteration:  700 / 2000 [ 35%]  (Warmup) 
Chain 4 Iteration:  800 / 2000 [ 40%]  (Warmup) 
Chain 4 Iteration:  900 / 2000 [ 45%]  (Warmup) 
Chain 4 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
Chain 4 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
Chain 4 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
Chain 4 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
Chain 4 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
Chain 4 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
Chain 4 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
Chain 4 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
Chain 4 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
Chain 4 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
Chain 4 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
Chain 4 Iteration: 2000 / 2000 [100%]  (Sampling) 
Chain 4 finished in 0.7 seconds.

All 4 chains finished successfully.
Mean chain execution time: 0.7 seconds.
Total execution time: 3.1 seconds.

Visualise Posteriors

# Plot marginal posteriors
draws2 <- as_draws_df(fit2, variable=paste0('b_',predictors)) |>
  set_variables(predictors)
p <- mcmc_areas(draws2,
                prob_outer=0.98, area_method = "scaled height") +
  xlim(c(-3,3))
Scale for x is already present.
Adding another scale for x, which will replace the existing scale.
p <- p + scale_y_discrete(limits = rev(levels(p$data$parameter)))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
p

Find prior predictive R2. This should be equal to the prior you set on R2 if all predictors are scaled to have unit variance. Since binary predictors were not standardised, the prior predictive might look slightly different, let’s compute it for illustration

ppR2_2<-numeric()
sims <- 4000

for (i in 1:sims) {
  sigma2 <- rstudent_t(1,3,0,3)^2
  R2 <- rbeta(1,1,2)
  tau2 <- R2/(1-R2)
  psi <- as.numeric(rdirichlet(1,rep(1,dim(X)[2])))
  beta <- rnorm(dim(X)[2])*sqrt(sigma2*tau2*psi)
  mu <- as.matrix(X)%*%as.vector(beta)
  muvar <- var(mu)
  ppR2_2[i] <- muvar/(muvar+sigma2)
}

Now, find the predictive R2 and compare to the posterior.

# Prior vs posterior R2
data <- data.frame(Prior=ppR2_2,Posterior=bayes_R2(fit2, summary=FALSE)) 
names(data) <- c("Prior","Posterior")
mcmc_hist(data,
          breaks=seq(0,1,length.out=100),
          facet_args = list(nrow = 2)) +
  facet_text(size = 13) +
  scale_x_continuous(limits = c(0,1), expand = c(0, 0),
                     labels = c("0","0.25","0.5","0.75","1")) +
  theme(axis.line.y = element_blank()) +
  xlab("Bayesian R^2")

Summaries of R2 and LOO-R2

# Bayes-R2 and LOO-R2
bayes_R2(fit2) |> as.data.frame() |> tt()
Estimate Est.Error Q2.5 Q97.5
0.2527685 0.03639153 0.180655 0.3217405
loo_R2(fit2) |> as.data.frame() |> tt()
Estimate Est.Error Q2.5 Q97.5
0.2144079 0.03301474 0.1466264 0.2781779