Regression and Other Stories: Different software options

Linear regression using different software options. See Appendix B in Regression and Other Stories.

Load packages

library("arm")
library("rstanarm")
library("brms")

Create fake data

N <- 100
b <- 1:3
x1 <- rnorm(N)
x2 <- rnorm(N)
X <- cbind(rep(1,N), x1, x2)
sigma <- 2
y <- X %*% b + rnorm(N, 0, sigma)
dat <- data.frame(y, x1, x2)

Fit and display using lm, listing predictors one at a time

fit1 <- lm(y ~ x1 + x2, data = dat)
display(fit1)

lm(formula = y ~ x1 + x2, data = dat)
            coef.est coef.se
(Intercept) 1.13     0.21   
x1          1.95     0.20   
x2          2.87     0.21   
---
n = 100, k = 3
residual sd = 2.08, R-Squared = 0.74

Extract estimates and uncertainties

b_hat <- coef(fit1)
b_se <- se.coef(fit1)
print(cbind(b_hat, b_se), digits=3)

            b_hat  b_se
(Intercept)  1.13 0.208
x1           1.95 0.195
x2           2.87 0.208

Fit and display using lm, using matrix of predictors

fit2 <- lm(y ~ X)
display(fit2)

lm(formula = y ~ X)
            coef.est coef.se
(Intercept) 1.13     0.21   
Xx1         1.95     0.20   
Xx2         2.87     0.21   
---
n = 100, k = 3
residual sd = 2.08, R-Squared = 0.74

Fit and display using stan_glm

fit3 <- stan_glm(y ~ x1 + x2, data = dat, refresh = 0)
print(fit3, digits=2)

stan_glm
 family:       gaussian [identity]
 formula:      y ~ x1 + x2
 observations: 100
 predictors:   3
------
            Median MAD_SD
(Intercept) 1.13   0.22  
x1          1.94   0.18  
x2          2.86   0.21  

Auxiliary parameter(s):
      Median MAD_SD
sigma 2.09   0.15  

------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg

Run again just to see some simulation variability

fit3 <- stan_glm(y ~ x1 + x2, data = dat, refresh = 0)
print(fit3, digits=2)

stan_glm
 family:       gaussian [identity]
 formula:      y ~ x1 + x2
 observations: 100
 predictors:   3
------
            Median MAD_SD
(Intercept) 1.13   0.21  
x1          1.94   0.19  
x2          2.86   0.21  

Auxiliary parameter(s):
      Median MAD_SD
sigma 2.09   0.16  

------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg

Extract estimates and uncertainties

b_hat <- coef(fit3)
b_se <- se(fit3)
print(cbind(b_hat, b_se), digits=3)

            b_hat  b_se
(Intercept)  1.13 0.207
x1           1.94 0.190
x2           2.86 0.207

Fit and display using brms

This will take longer as the model is not pre-compiled as in stan_glm.

fit4 <- brm(y ~ x1 + x2, data = dat, refresh = 0)
print(fit4, digits=2)

 Family: gaussian 
  Links: mu = identity; sigma = identity 
Formula: y ~ x1 + x2 
   Data: dat (Number of observations: 100) 
Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
         total post-warmup samples = 4000

Population-Level Effects: 
          Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept     1.13      0.21     0.71     1.52 1.00     4538     3166
x1            1.95      0.19     1.58     2.32 1.00     4503     3322
x2            2.86      0.21     2.46     3.26 1.00     4496     3012

Family Specific Parameters: 
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma     2.10      0.15     1.83     2.41 1.00     4244     3167

Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).

Stan code generated by brms can be used to learn Stan or get a starting point for a model which is not yet implemented in rstanarm or brms

stancode(fit4)

// generated with brms 2.12.0
functions {
}
data {
  int<lower=1> N;  // number of observations
  vector[N] Y;  // response variable
  int<lower=1> K;  // number of population-level effects
  matrix[N, K] X;  // population-level design matrix
  int prior_only;  // should the likelihood be ignored?
}
transformed data {
  int Kc = K - 1;
  matrix[N, Kc] Xc;  // centered version of X without an intercept
  vector[Kc] means_X;  // column means of X before centering
  for (i in 2:K) {
    means_X[i - 1] = mean(X[, i]);
    Xc[, i - 1] = X[, i] - means_X[i - 1];
  }
}
parameters {
  vector[Kc] b;  // population-level effects
  real Intercept;  // temporary intercept for centered predictors
  real<lower=0> sigma;  // residual SD
}
transformed parameters {
}
model {
  // priors including all constants
  target += student_t_lpdf(Intercept | 3, 2, 10);
  target += student_t_lpdf(sigma | 3, 0, 10)
    - 1 * student_t_lccdf(0 | 3, 0, 10);
  // likelihood including all constants
  if (!prior_only) {
    target += normal_id_glm_lpdf(Y | Xc, Intercept, b, sigma);
  }
}
generated quantities {
  // actual population-level intercept
  real b_Intercept = Intercept - dot_product(means_X, b);
}

LS0tCnRpdGxlOiAiUmVncmVzc2lvbiBhbmQgT3RoZXIgU3RvcmllczogRGlmZmVyZW50IHNvZnR3YXJlIG9wdGlvbnMiCmF1dGhvcjogIkFuZHJldyBHZWxtYW4sIEFraSBWZWh0YXJpIgpkYXRlOiAiYHIgZm9ybWF0KFN5cy5EYXRlKCkpYCIKb3V0cHV0OgogIGh0bWxfZG9jdW1lbnQ6CiAgICB0aGVtZTogcmVhZGFibGUKICAgIHRvYzogdHJ1ZQogICAgdG9jX2RlcHRoOiAyCiAgICB0b2NfZmxvYXQ6IHRydWUKICAgIGNvZGVfZG93bmxvYWQ6IHRydWUKLS0tCgpMaW5lYXIgcmVncmVzc2lvbiB1c2luZyBkaWZmZXJlbnQgc29mdHdhcmUgb3B0aW9ucy4gU2VlIEFwcGVuZGl4IEIgaW4KUmVncmVzc2lvbiBhbmQgT3RoZXIgU3Rvcmllcy4KCi0tLS0tLS0tLS0tLS0KCgpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KG1lc3NhZ2U9RkFMU0UsIGVycm9yPUZBTFNFLCB3YXJuaW5nPUZBTFNFLCBjb21tZW50PU5BKQpgYGAKCiMjIyMgTG9hZCBwYWNrYWdlcwoKYGBge3IgfQpsaWJyYXJ5KCJhcm0iKQpsaWJyYXJ5KCJyc3RhbmFybSIpCmxpYnJhcnkoImJybXMiKQpgYGAKCiMjIENyZWF0ZSBmYWtlIGRhdGEKCmBgYHtyIH0KTiA8LSAxMDAKYiA8LSAxOjMKeDEgPC0gcm5vcm0oTikKeDIgPC0gcm5vcm0oTikKWCA8LSBjYmluZChyZXAoMSxOKSwgeDEsIHgyKQpzaWdtYSA8LSAyCnkgPC0gWCAlKiUgYiArIHJub3JtKE4sIDAsIHNpZ21hKQpkYXQgPC0gZGF0YS5mcmFtZSh5LCB4MSwgeDIpCmBgYAoKIyMgRml0IGFuZCBkaXNwbGF5IHVzaW5nIGxtLCBsaXN0aW5nIHByZWRpY3RvcnMgb25lIGF0IGEgdGltZQoKYGBge3IgfQpmaXQxIDwtIGxtKHkgfiB4MSArIHgyLCBkYXRhID0gZGF0KQpkaXNwbGF5KGZpdDEpCmBgYAoKIyMjIyBFeHRyYWN0IGVzdGltYXRlcyBhbmQgdW5jZXJ0YWludGllcwoKYGBge3IgfQpiX2hhdCA8LSBjb2VmKGZpdDEpCmJfc2UgPC0gc2UuY29lZihmaXQxKQpwcmludChjYmluZChiX2hhdCwgYl9zZSksIGRpZ2l0cz0zKQpgYGAKCiMjIEZpdCBhbmQgZGlzcGxheSB1c2luZyBsbSwgdXNpbmcgbWF0cml4IG9mIHByZWRpY3RvcnMKCmBgYHtyIH0KZml0MiA8LSBsbSh5IH4gWCkKZGlzcGxheShmaXQyKQpgYGAKCiMjIEZpdCBhbmQgZGlzcGxheSB1c2luZyBzdGFuX2dsbQoKYGBge3IgfQpmaXQzIDwtIHN0YW5fZ2xtKHkgfiB4MSArIHgyLCBkYXRhID0gZGF0LCByZWZyZXNoID0gMCkKcHJpbnQoZml0MywgZGlnaXRzPTIpCmBgYAoKIyMjIyBSdW4gYWdhaW4ganVzdCB0byBzZWUgc29tZSBzaW11bGF0aW9uIHZhcmlhYmlsaXR5CgpgYGB7ciB9CmZpdDMgPC0gc3Rhbl9nbG0oeSB+IHgxICsgeDIsIGRhdGEgPSBkYXQsIHJlZnJlc2ggPSAwKQpwcmludChmaXQzLCBkaWdpdHM9MikKYGBgCgojIyMjIEV4dHJhY3QgZXN0aW1hdGVzIGFuZCB1bmNlcnRhaW50aWVzCgpgYGB7ciB9CmJfaGF0IDwtIGNvZWYoZml0MykKYl9zZSA8LSBzZShmaXQzKQpwcmludChjYmluZChiX2hhdCwgYl9zZSksIGRpZ2l0cz0zKQpgYGAKCiMjIEZpdCBhbmQgZGlzcGxheSB1c2luZyBicm1zPGJyPgpUaGlzIHdpbGwgdGFrZSBsb25nZXIgYXMgdGhlIG1vZGVsIGlzIG5vdCBwcmUtY29tcGlsZWQgYXMgaW4gc3Rhbl9nbG0uCgpgYGB7ciB9CmZpdDQgPC0gYnJtKHkgfiB4MSArIHgyLCBkYXRhID0gZGF0LCByZWZyZXNoID0gMCkKcHJpbnQoZml0NCwgZGlnaXRzPTIpCmBgYAoKU3RhbiBjb2RlIGdlbmVyYXRlZCBieSBicm1zIGNhbiBiZSB1c2VkIHRvIGxlYXJuIFN0YW4gb3IgZ2V0IGEKc3RhcnRpbmcgcG9pbnQgZm9yIGEgbW9kZWwgd2hpY2ggaXMgbm90IHlldCBpbXBsZW1lbnRlZCBpbiByc3RhbmFybQpvciBicm1zCgpgYGB7ciB9CnN0YW5jb2RlKGZpdDQpCmBgYAoK

Regression and Other Stories: Different software options

Andrew Gelman, Aki Vehtari

2020-06-04

Load packages

Create fake data

Fit and display using lm, listing predictors one at a time

Extract estimates and uncertainties

Fit and display using lm, using matrix of predictors

Fit and display using stan_glm

Run again just to see some simulation variability

Extract estimates and uncertainties

Fit and display using brms