Demonstrate how the computation time scales with bigger data. See Chapter 22 in Regression and Other Stories.
library(arm)
library(rstanarm)
options(mc.cores = parallel::detectCores())
library(tictoc)
SEED <- 1656
set.seed(SEED)
n <- 1e5
k <- 100
x <- rnorm(n)
xn <- matrix(rnorm(n*(k-1)),nrow=n)
a <- 2
b <- 3
sigma <- 10
y <- a + b*x + sigma*rnorm(n)
fake <- data.frame(x, xn, y)
tic()
fit1 <- lm(y ~ ., data=fake)
toc()
1.722 sec elapsed
#display(fit1)
stan_glm is fast for linear regression with n>k and small or moderate k (using OLS trick)
tic()
fit2 <- stan_glm(y ~ ., data=fake, mean_PPD=FALSE,
refresh=500, seed=SEED, cores=1)
toc()
15.704 sec elapsed
#print(fit2, digits=2)
Using optimization with normal approximation and Pareto smoothed importance resampling provides coefficient standard errors, but also diagnostic whether normal approximation at the mode is appropriate.
tic()
fit3 <- stan_glm(y ~ ., data=fake, algorithm='optimizing', init=0)
toc()
10.287 sec elapsed
#print(fit3, digits=2)
SEED <- 1655
set.seed(SEED)
n <- 1e4
k <- 100
x <- rnorm(n)
xn <- matrix(rnorm(n*(k-1)),nrow=n)
a <- 2
b <- 3
sigma <- 1
y <- as.numeric(a + b*x + sigma*rnorm(n) > 0)
fake <- data.frame(x, xn, y)
tic()
fit1 <- glm(y ~ ., data=fake, family=binomial())
toc()
1.118 sec elapsed
#display(fit1)
tic()
fit2 <- stan_glm(y ~ ., data=fake, family=binomial(), mean_PPD=FALSE,
init_r=0.1, seed=SEED)
toc()
117.87 sec elapsed
#print(fit2, digits=2)
Using optimization with normal approximation and Pareto smoothed importance resampling provides coefficient standard errors, but also diagnostic whether normal approximation at the mode is appropriate.
tic()
fit3 <- stan_glm(y ~ ., data=fake, family=binomial(),
algorithm='optimizing', init=0, draws = 16000, keep_every = 4)
toc()
22.869 sec elapsed
#print(fit3, digits=2)
SEED <- 1655
set.seed(SEED)
n <- 1e5
k <- 100
x <- rnorm(n)
xn <- matrix(rnorm(n*(k-1)),nrow=n)
a <- 2
b <- 3
sigma <- 1
y <- as.numeric(a + b*x + sigma*rnorm(n) > 0)
fake <- data.frame(x, xn, y)
tic()
fit1 <- glm(y ~ ., data=fake, family=binomial())
toc()
12.729 sec elapsed
#display(fit1)
Using optimization with normal approximation and Pareto smoothed importance resampling provides coefficient standard errors, but also diagnostic whether normal approximation at the mode is appropriate.
tic()
fit3 <- stan_glm(y ~ ., data=fake, family=binomial(),
algorithm='optimizing', init=0)
toc()
21.868 sec elapsed
summary(fit3, digits=2)
Model Info:
function: stan_glm
family: binomial [logit]
formula: y ~ .
algorithm: optimizing
priors: see help('prior_summary')
observations: 100000
predictors: 101
Estimates:
Median MAD_SD 10% 50% 90%
(Intercept) 3.61 0.03 3.58 3.61 3.65
x 5.45 0.05 5.39 5.45 5.50
X1 0.00 0.01 -0.02 0.00 0.01
X2 0.04 0.01 0.03 0.04 0.06
X3 0.00 0.01 -0.02 0.00 0.01
X4 0.00 0.01 -0.02 0.00 0.01
X5 0.02 0.01 0.00 0.02 0.03
X6 0.01 0.01 0.00 0.01 0.03
X7 -0.01 0.01 -0.03 -0.01 0.01
X8 0.01 0.01 -0.01 0.01 0.02
X9 0.00 0.01 -0.01 0.00 0.02
X10 0.00 0.01 -0.02 0.00 0.02
X11 0.01 0.01 -0.01 0.01 0.02
X12 0.00 0.01 -0.02 0.00 0.01
X13 -0.01 0.01 -0.03 -0.01 0.01
X14 -0.01 0.01 -0.02 -0.01 0.01
X15 -0.02 0.01 -0.04 -0.02 0.00
X16 0.01 0.01 -0.01 0.01 0.03
X17 0.02 0.01 0.00 0.02 0.04
X18 -0.01 0.01 -0.02 -0.01 0.01
X19 0.02 0.01 0.00 0.02 0.03
X20 -0.01 0.01 -0.02 -0.01 0.01
X21 -0.03 0.01 -0.05 -0.03 -0.01
X22 -0.03 0.01 -0.05 -0.03 -0.01
X23 0.01 0.01 -0.01 0.01 0.03
X24 0.00 0.01 -0.02 0.00 0.01
X25 0.01 0.01 -0.01 0.01 0.02
X26 0.00 0.01 -0.02 0.00 0.01
X27 -0.01 0.01 -0.02 -0.01 0.01
X28 0.01 0.01 -0.01 0.01 0.02
X29 -0.01 0.01 -0.02 -0.01 0.01
X30 -0.01 0.01 -0.03 -0.01 0.01
X31 0.00 0.01 -0.02 0.00 0.02
X32 -0.02 0.01 -0.03 -0.02 0.00
X33 0.00 0.01 -0.02 0.00 0.01
X34 0.01 0.01 -0.01 0.01 0.03
X35 0.00 0.01 -0.02 0.00 0.02
X36 -0.01 0.02 -0.03 -0.01 0.00
X37 0.01 0.01 -0.01 0.01 0.02
X38 0.00 0.01 -0.02 0.00 0.02
X39 0.01 0.01 0.00 0.01 0.03
X40 0.00 0.01 -0.01 0.00 0.02
X41 -0.03 0.01 -0.05 -0.03 -0.02
X42 -0.02 0.01 -0.03 -0.02 0.00
X43 0.00 0.01 -0.02 0.00 0.01
X44 0.00 0.01 -0.01 0.00 0.02
X45 0.00 0.01 -0.02 0.00 0.02
X46 -0.02 0.01 -0.03 -0.02 0.00
X47 0.00 0.01 -0.02 0.00 0.01
X48 -0.01 0.01 -0.02 -0.01 0.01
X49 0.01 0.01 -0.01 0.01 0.02
X50 0.00 0.01 -0.02 0.00 0.02
X51 0.00 0.01 -0.02 0.00 0.01
X52 0.01 0.01 -0.01 0.01 0.02
X53 0.00 0.01 -0.02 0.00 0.02
X54 0.00 0.01 -0.01 0.00 0.02
X55 0.01 0.01 -0.01 0.01 0.03
X56 -0.01 0.01 -0.03 -0.01 0.00
X57 0.00 0.01 -0.02 0.00 0.01
X58 0.01 0.01 -0.01 0.01 0.02
X59 -0.02 0.01 -0.04 -0.02 -0.01
X60 -0.01 0.01 -0.02 -0.01 0.01
X61 -0.02 0.01 -0.03 -0.02 0.00
X62 -0.01 0.01 -0.03 -0.01 0.00
X63 0.01 0.01 -0.01 0.01 0.02
X64 0.01 0.01 -0.01 0.01 0.03
X65 0.01 0.01 -0.01 0.01 0.03
X66 0.03 0.01 0.01 0.03 0.04
X67 -0.02 0.01 -0.04 -0.02 0.00
X68 0.01 0.01 0.00 0.01 0.03
X69 -0.01 0.01 -0.03 -0.01 0.01
X70 0.00 0.01 -0.02 0.00 0.01
X71 0.02 0.01 0.01 0.02 0.04
X72 0.00 0.01 -0.02 0.00 0.02
X73 -0.01 0.01 -0.03 -0.01 0.01
X74 -0.01 0.01 -0.03 -0.01 0.01
X75 -0.02 0.01 -0.03 -0.02 0.00
X76 0.01 0.01 -0.01 0.01 0.02
X77 0.00 0.01 -0.02 0.00 0.01
X78 -0.01 0.01 -0.03 -0.01 0.00
X79 0.01 0.01 -0.01 0.01