Linear regression using different software options. See Appendix B in Regression and Other Stories.
library("arm")
library("rstanarm")
library("brms")N <- 100
b <- 1:3
x1 <- rnorm(N)
x2 <- rnorm(N)
X <- cbind(rep(1,N), x1, x2)
sigma <- 2
y <- X %*% b + rnorm(N, 0, sigma)
dat <- data.frame(y, x1, x2)fit1 <- lm(y ~ x1 + x2, data = dat)
display(fit1)lm(formula = y ~ x1 + x2, data = dat)
            coef.est coef.se
(Intercept) 1.13     0.21   
x1          1.95     0.20   
x2          2.87     0.21   
---
n = 100, k = 3
residual sd = 2.08, R-Squared = 0.74b_hat <- coef(fit1)
b_se <- se.coef(fit1)
print(cbind(b_hat, b_se), digits=3)            b_hat  b_se
(Intercept)  1.13 0.208
x1           1.95 0.195
x2           2.87 0.208fit2 <- lm(y ~ X)
display(fit2)lm(formula = y ~ X)
            coef.est coef.se
(Intercept) 1.13     0.21   
Xx1         1.95     0.20   
Xx2         2.87     0.21   
---
n = 100, k = 3
residual sd = 2.08, R-Squared = 0.74fit3 <- stan_glm(y ~ x1 + x2, data = dat, refresh = 0)
print(fit3, digits=2)stan_glm
 family:       gaussian [identity]
 formula:      y ~ x1 + x2
 observations: 100
 predictors:   3
------
            Median MAD_SD
(Intercept) 1.13   0.22  
x1          1.94   0.18  
x2          2.86   0.21  
Auxiliary parameter(s):
      Median MAD_SD
sigma 2.09   0.15  
------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanregfit3 <- stan_glm(y ~ x1 + x2, data = dat, refresh = 0)
print(fit3, digits=2)stan_glm
 family:       gaussian [identity]
 formula:      y ~ x1 + x2
 observations: 100
 predictors:   3
------
            Median MAD_SD
(Intercept) 1.13   0.21  
x1          1.94   0.19  
x2          2.86   0.21  
Auxiliary parameter(s):
      Median MAD_SD
sigma 2.09   0.16  
------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanregb_hat <- coef(fit3)
b_se <- se(fit3)
print(cbind(b_hat, b_se), digits=3)            b_hat  b_se
(Intercept)  1.13 0.207
x1           1.94 0.190
x2           2.86 0.207This will take longer as the model is not pre-compiled as in stan_glm.
fit4 <- brm(y ~ x1 + x2, data = dat, refresh = 0)
print(fit4, digits=2) Family: gaussian 
  Links: mu = identity; sigma = identity 
Formula: y ~ x1 + x2 
   Data: dat (Number of observations: 100) 
Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
         total post-warmup samples = 4000
Population-Level Effects: 
          Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept     1.13      0.21     0.71     1.52 1.00     4538     3166
x1            1.95      0.19     1.58     2.32 1.00     4503     3322
x2            2.86      0.21     2.46     3.26 1.00     4496     3012
Family Specific Parameters: 
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma     2.10      0.15     1.83     2.41 1.00     4244     3167
Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).Stan code generated by brms can be used to learn Stan or get a starting point for a model which is not yet implemented in rstanarm or brms
stancode(fit4)// generated with brms 2.12.0
functions {
}
data {
  int<lower=1> N;  // number of observations
  vector[N] Y;  // response variable
  int<lower=1> K;  // number of population-level effects
  matrix[N, K] X;  // population-level design matrix
  int prior_only;  // should the likelihood be ignored?
}
transformed data {
  int Kc = K - 1;
  matrix[N, Kc] Xc;  // centered version of X without an intercept
  vector[Kc] means_X;  // column means of X before centering
  for (i in 2:K) {
    means_X[i - 1] = mean(X[, i]);
    Xc[, i - 1] = X[, i] - means_X[i - 1];
  }
}
parameters {
  vector[Kc] b;  // population-level effects
  real Intercept;  // temporary intercept for centered predictors
  real<lower=0> sigma;  // residual SD
}
transformed parameters {
}
model {
  // priors including all constants
  target += student_t_lpdf(Intercept | 3, 2, 10);
  target += student_t_lpdf(sigma | 3, 0, 10)
    - 1 * student_t_lccdf(0 | 3, 0, 10);
  // likelihood including all constants
  if (!prior_only) {
    target += normal_id_glm_lpdf(Y | Xc, Intercept, b, sigma);
  }
}
generated quantities {
  // actual population-level intercept
  real b_Intercept = Intercept - dot_product(means_X, b);
}