Demonstrate how the computation time scales with bigger data. See Chapter 22 in Regression and Other Stories.


Load packages

library(arm)
library(rstanarm)
options(mc.cores = parallel::detectCores())
library(tictoc)

Linear regression n=100K, p=100

Create fake data with n=100,000 and p=100

SEED <- 1656
set.seed(SEED)
n <- 1e5
k <- 100
x <- rnorm(n)
xn <- matrix(rnorm(n*(k-1)),nrow=n)
a <- 2
b <- 3
sigma <- 10
y <- a + b*x + sigma*rnorm(n)
fake <- data.frame(x, xn, y)

Fit using lm

tic()
fit1 <- lm(y ~ ., data=fake)
toc()
1.722 sec elapsed
#display(fit1)

Fit using stan_glm and MCMC

stan_glm is fast for linear regression with n>k and small or moderate k (using OLS trick)

tic()
fit2 <- stan_glm(y ~ ., data=fake, mean_PPD=FALSE,
                 refresh=500, seed=SEED, cores=1)
toc()
15.704 sec elapsed
#print(fit2, digits=2)

Fit using stan_glm and optimization

Using optimization with normal approximation and Pareto smoothed importance resampling provides coefficient standard errors, but also diagnostic whether normal approximation at the mode is appropriate.

tic()
fit3 <- stan_glm(y ~ ., data=fake, algorithm='optimizing', init=0) 
toc()
10.287 sec elapsed
#print(fit3, digits=2)

Logistic regression n=10K, p=100

Create fake data with 10,000 observations and p=100

SEED <- 1655
set.seed(SEED)
n <- 1e4
k <- 100
x <- rnorm(n)
xn <- matrix(rnorm(n*(k-1)),nrow=n)
a <- 2
b <- 3
sigma <- 1
y <- as.numeric(a + b*x + sigma*rnorm(n) > 0)
fake <- data.frame(x, xn, y)

Fit using glm

tic()
fit1 <- glm(y ~ ., data=fake, family=binomial())
toc()
1.118 sec elapsed
#display(fit1)

Fit using stan_glm and MCMC

tic()
fit2 <- stan_glm(y ~ ., data=fake, family=binomial(), mean_PPD=FALSE,
                 init_r=0.1, seed=SEED)
toc()
117.87 sec elapsed
#print(fit2, digits=2)

Fit using stan_glm and optimization

Using optimization with normal approximation and Pareto smoothed importance resampling provides coefficient standard errors, but also diagnostic whether normal approximation at the mode is appropriate.

tic()
fit3 <- stan_glm(y ~ ., data=fake, family=binomial(),
                algorithm='optimizing', init=0, draws = 16000, keep_every = 4) 
toc()
22.869 sec elapsed
#print(fit3, digits=2)

Logistic regression n=100K, p=100

Create fake data with 100,000 observations and p=100

SEED <- 1655
set.seed(SEED)
n <- 1e5
k <- 100
x <- rnorm(n)
xn <- matrix(rnorm(n*(k-1)),nrow=n)
a <- 2
b <- 3
sigma <- 1
y <- as.numeric(a + b*x + sigma*rnorm(n) > 0)
fake <- data.frame(x, xn, y)

Fit using glm

tic()
fit1 <- glm(y ~ ., data=fake, family=binomial())
toc()
12.729 sec elapsed
#display(fit1)

Fit using stan_glm and optimization

Using optimization with normal approximation and Pareto smoothed importance resampling provides coefficient standard errors, but also diagnostic whether normal approximation at the mode is appropriate.

tic()
fit3 <- stan_glm(y ~ ., data=fake, family=binomial(),
                 algorithm='optimizing', init=0) 
toc()
21.868 sec elapsed
summary(fit3, digits=2)

Model Info:
 function:     stan_glm
 family:       binomial [logit]
 formula:      y ~ .
 algorithm:    optimizing
 priors:       see help('prior_summary')
 observations: 100000
 predictors:   101

Estimates:
              Median   MAD_SD   10%   50%   90%
(Intercept)  3.61     0.03     3.58  3.61  3.65
x            5.45     0.05     5.39  5.45  5.50
X1           0.00     0.01    -0.02  0.00  0.01
X2           0.04     0.01     0.03  0.04  0.06
X3           0.00     0.01    -0.02  0.00  0.01
X4           0.00     0.01    -0.02  0.00  0.01
X5           0.02     0.01     0.00  0.02  0.03
X6           0.01     0.01     0.00  0.01  0.03
X7          -0.01     0.01    -0.03 -0.01  0.01
X8           0.01     0.01    -0.01  0.01  0.02
X9           0.00     0.01    -0.01  0.00  0.02
X10          0.00     0.01    -0.02  0.00  0.02
X11          0.01     0.01    -0.01  0.01  0.02
X12          0.00     0.01    -0.02  0.00  0.01
X13         -0.01     0.01    -0.03 -0.01  0.01
X14         -0.01     0.01    -0.02 -0.01  0.01
X15         -0.02     0.01    -0.04 -0.02  0.00
X16          0.01     0.01    -0.01  0.01  0.03
X17          0.02     0.01     0.00  0.02  0.04
X18         -0.01     0.01    -0.02 -0.01  0.01
X19          0.02     0.01     0.00  0.02  0.03
X20         -0.01     0.01    -0.02 -0.01  0.01
X21         -0.03     0.01    -0.05 -0.03 -0.01
X22         -0.03     0.01    -0.05 -0.03 -0.01
X23          0.01     0.01    -0.01  0.01  0.03
X24          0.00     0.01    -0.02  0.00  0.01
X25          0.01     0.01    -0.01  0.01  0.02
X26          0.00     0.01    -0.02  0.00  0.01
X27         -0.01     0.01    -0.02 -0.01  0.01
X28          0.01     0.01    -0.01  0.01  0.02
X29         -0.01     0.01    -0.02 -0.01  0.01
X30         -0.01     0.01    -0.03 -0.01  0.01
X31          0.00     0.01    -0.02  0.00  0.02
X32         -0.02     0.01    -0.03 -0.02  0.00
X33          0.00     0.01    -0.02  0.00  0.01
X34          0.01     0.01    -0.01  0.01  0.03
X35          0.00     0.01    -0.02  0.00  0.02
X36         -0.01     0.02    -0.03 -0.01  0.00
X37          0.01     0.01    -0.01  0.01  0.02
X38          0.00     0.01    -0.02  0.00  0.02
X39          0.01     0.01     0.00  0.01  0.03
X40          0.00     0.01    -0.01  0.00  0.02
X41         -0.03     0.01    -0.05 -0.03 -0.02
X42         -0.02     0.01    -0.03 -0.02  0.00
X43          0.00     0.01    -0.02  0.00  0.01
X44          0.00     0.01    -0.01  0.00  0.02
X45          0.00     0.01    -0.02  0.00  0.02
X46         -0.02     0.01    -0.03 -0.02  0.00
X47          0.00     0.01    -0.02  0.00  0.01
X48         -0.01     0.01    -0.02 -0.01  0.01
X49          0.01     0.01    -0.01  0.01  0.02
X50          0.00     0.01    -0.02  0.00  0.02
X51          0.00     0.01    -0.02  0.00  0.01
X52          0.01     0.01    -0.01  0.01  0.02
X53          0.00     0.01    -0.02  0.00  0.02
X54          0.00     0.01    -0.01  0.00  0.02
X55          0.01     0.01    -0.01  0.01  0.03
X56         -0.01     0.01    -0.03 -0.01  0.00
X57          0.00     0.01    -0.02  0.00  0.01
X58          0.01     0.01    -0.01  0.01  0.02
X59         -0.02     0.01    -0.04 -0.02 -0.01
X60         -0.01     0.01    -0.02 -0.01  0.01
X61         -0.02     0.01    -0.03 -0.02  0.00
X62         -0.01     0.01    -0.03 -0.01  0.00
X63          0.01     0.01    -0.01  0.01  0.02
X64          0.01     0.01    -0.01  0.01  0.03
X65          0.01     0.01    -0.01  0.01  0.03
X66          0.03     0.01     0.01  0.03  0.04
X67         -0.02     0.01    -0.04 -0.02  0.00
X68          0.01     0.01     0.00  0.01  0.03
X69         -0.01     0.01    -0.03 -0.01  0.01
X70          0.00     0.01    -0.02  0.00  0.01
X71          0.02     0.01     0.01  0.02  0.04
X72          0.00     0.01    -0.02  0.00  0.02
X73         -0.01     0.01    -0.03 -0.01  0.01
X74         -0.01     0.01    -0.03 -0.01  0.01
X75         -0.02     0.01    -0.03 -0.02  0.00
X76          0.01     0.01    -0.01  0.01  0.02
X77          0.00     0.01    -0.02  0.00  0.01
X78         -0.01     0.01    -0.03 -0.01  0.00
X79          0.01     0.01    -0.01  0.01  0.02
X80          0.01     0.01    -0.01  0.01  0.02
X81         -0.02     0.01    -0.04 -0.02  0.00
X82         -0.01     0.01    -0.03 -0.01  0.01
X83          0.00     0.01    -0.02  0.00  0.02
X84          0.01     0.01    -0.01  0.01  0.03
X85         -0.01     0.01    -0.03 -0.01  0.00
X86          0.01     0.01    -0.01  0.01  0.03
X87         -0.01     0.01    -0.03 -0.01  0.00
X88          0.02     0.01     0.00  0.02  0.03
X89         -0.01     0.01    -0.03 -0.01  0.01
X90          0.00     0.01    -0.02  0.00  0.01
X91         -0.02     0.01    -0.03 -0.02  0.00
X92         -0.03     0.01    -0.05 -0.03 -0.01
X93          0.02     0.01     0.00  0.02  0.04
X94         -0.01     0.01    -0.03 -0.01  0.01
X95         -0.04     0.01    -0.06 -0.04 -0.03
X96         -0.01     0.01    -0.02 -0.01  0.01
X97          0.01     0.01    -0.01  0.01  0.03
X98         -0.01     0.01    -0.03 -0.01  0.01
X99          0.01     0.01    -0.01  0.01  0.02

Monte Carlo diagnostics
            mcse  khat  n_eff
(Intercept)  0.00 -0.14 506  
x            0.00 -0.13 532  
X1           0.00 -0.14 631  
X2           0.00 -0.14 715  
X3           0.00 -0.14 694  
X4           0.00 -0.14 706  
X5           0.00 -0.14 621  
X6           0.00 -0.14 646  
X7           0.00 -0.14 642  
X8           0.00 -0.14 623  
X9           0.00 -0.14 664  
X10          0.00 -0.14 757  
X11          0.00 -0.14 736  
X12          0.00 -0.14 623  
X13          0.00 -0.14 627  
X14          0.00 -0.14 701  
X15          0.00 -0.14 709  
X16          0.00 -0.14 693  
X17          0.00 -0.14 663  
X18          0.00 -0.14 699  
X19          0.00 -0.14 707  
X20          0.00 -0.14 738  
X21          0.00 -0.14 694  
X22          0.00 -0.14 676  
X23          0.00 -0.14 637  
X24          0.00 -0.14 733  
X25          0.00 -0.14 701  
X26          0.00 -0.14 626  
X27          0.00 -0.14 671  
X28          0.00 -0.14 685  
X29          0.00 -0.14 707  
X30          0.00 -0.14 710  
X31          0.00 -0.14 745  
X32          0.00 -0.14 613  
X33          0.00 -0.14 651  
X34          0.00 -0.14 696  
X35          0.00 -0.14 739  
X36          0.00 -0.14 635  
X37          0.00 -0.14 692  
X38          0.00 -0.14 682  
X39          0.00 -0.14 679  
X40          0.00 -0.14 627  
X41          0.00 -0.14 732  
X42          0.00 -0.14 647  
X43          0.00 -0.14 698  
X44          0.00 -0.14 698  
X45          0.00 -0.14 689  
X46          0.00 -0.14 690  
X47          0.00 -0.14 710  
X48          0.00 -0.14 661  
X49          0.00 -0.14 685  
X50          0.00 -0.14 639  
X51          0.00 -0.14 681  
X52          0.00 -0.14 668  
X53          0.00 -0.14 655  
X54          0.00 -0.14 709  
X55          0.00 -0.14 686  
X56          0.00 -0.14 633  
X57          0.00 -0.14 639  
X58          0.00 -0.14 644  
X59          0.00 -0.14 675  
X60          0.00 -0.14 658  
X61          0.00 -0.14 686  
X62          0.00 -0.14 637  
X63          0.00 -0.14 620  
X64          0.00 -0.14 696  
X65          0.00 -0.14 724  
X66          0.00 -0.14 617  
X67          0.00 -0.14 667  
X68          0.00 -0.14 649  
X69          0.00 -0.14 687  
X70          0.00 -0.14 709  
X71          0.00 -0.14 627  
X72          0.00 -0.14 772  
X73          0.00 -0.14 638  
X74          0.00 -0.14 701  
X75          0.00 -0.14 684  
X76          0.00 -0.14 677  
X77          0.00 -0.14 654  
X78          0.00 -0.14 668  
X79          0.00 -0.14 689  
X80          0.00 -0.14 673  
X81          0.00 -0.13 646  
X82          0.00 -0.14 713  
X83          0.00 -0.14 656  
X84          0.00 -0.14 668  
X85          0.00 -0.14 684  
X86          0.00 -0.14 637  
X87          0.00 -0.14 703  
X88          0.00 -0.14 653  
X89          0.00 -0.14 683  
X90          0.00 -0.14 639  
X91          0.00 -0.14 652  
X92          0.00 -0.14 637  
X93          0.00 -0.14 654  
X94          0.00 -0.14 697  
X95          0.00 -0.14 635  
X96          0.00 -0.14 675  
X97          0.00 -0.14 725  
X98          0.00 -0.14 663  
X99          0.00 -0.14 640  

For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and khat is the Pareto k diagnostic for importance sampling (perfomance is usually good when khat < 0.7).
LS0tCnRpdGxlOiAiUmVncmVzc2lvbiBhbmQgT3RoZXIgU3RvcmllczogU2NhbGFiaWxpdHkiCmF1dGhvcjogIkFuZHJldyBHZWxtYW4sIEplbm5pZmVyIEhpbGwsIEFraSBWZWh0YXJpIgpkYXRlOiAiYHIgZm9ybWF0KFN5cy5EYXRlKCkpYCIKb3V0cHV0OgogIGh0bWxfZG9jdW1lbnQ6CiAgICB0aGVtZTogcmVhZGFibGUKICAgIHRvYzogdHJ1ZQogICAgdG9jX2RlcHRoOiAyCiAgICB0b2NfZmxvYXQ6IHRydWUKICAgIGNvZGVfZG93bmxvYWQ6IHRydWUKLS0tCkRlbW9uc3RyYXRlIGhvdyB0aGUgY29tcHV0YXRpb24gdGltZSBzY2FsZXMgd2l0aCBiaWdnZXIgZGF0YS4gU2VlCkNoYXB0ZXIgMjIgaW4gUmVncmVzc2lvbiBhbmQgT3RoZXIgU3Rvcmllcy4KCi0tLS0tLS0tLS0tLS0KCgpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KG1lc3NhZ2U9RkFMU0UsIGVycm9yPUZBTFNFLCB3YXJuaW5nPUZBTFNFLCBjb21tZW50PU5BKQpgYGAKCiMjIyMgTG9hZCBwYWNrYWdlcwoKYGBge3IgfQpsaWJyYXJ5KGFybSkKbGlicmFyeShyc3RhbmFybSkKb3B0aW9ucyhtYy5jb3JlcyA9IHBhcmFsbGVsOjpkZXRlY3RDb3JlcygpKQpsaWJyYXJ5KHRpY3RvYykKYGBgCgojIyBMaW5lYXIgcmVncmVzc2lvbiBuPTEwMEssIHA9MTAwCgojIyMjIENyZWF0ZSBmYWtlIGRhdGEgd2l0aCBuPTEwMFwsMDAwIGFuZCBwPTEwMAoKYGBge3IgfQpTRUVEIDwtIDE2NTYKc2V0LnNlZWQoU0VFRCkKbiA8LSAxZTUKayA8LSAxMDAKeCA8LSBybm9ybShuKQp4biA8LSBtYXRyaXgocm5vcm0obiooay0xKSksbnJvdz1uKQphIDwtIDIKYiA8LSAzCnNpZ21hIDwtIDEwCnkgPC0gYSArIGIqeCArIHNpZ21hKnJub3JtKG4pCmZha2UgPC0gZGF0YS5mcmFtZSh4LCB4biwgeSkKYGBgCgojIyMjIEZpdCB1c2luZyBsbQoKYGBge3IgfQp0aWMoKQpmaXQxIDwtIGxtKHkgfiAuLCBkYXRhPWZha2UpCnRvYygpCiNkaXNwbGF5KGZpdDEpCmBgYAoKIyMjIyBGaXQgdXNpbmcgc3Rhbl9nbG0gYW5kIE1DTUMKCnN0YW5fZ2xtIGlzIGZhc3QgZm9yIGxpbmVhciByZWdyZXNzaW9uIHdpdGggbj5rIGFuZCBzbWFsbCBvcgptb2RlcmF0ZSBrICh1c2luZyBPTFMgdHJpY2spCgpgYGB7ciB9CnRpYygpCmBgYApgYGB7ciByZXN1bHRzPSdoaWRlJ30KZml0MiA8LSBzdGFuX2dsbSh5IH4gLiwgZGF0YT1mYWtlLCBtZWFuX1BQRD1GQUxTRSwKICAgICAgICAgICAgICAgICByZWZyZXNoPTUwMCwgc2VlZD1TRUVELCBjb3Jlcz0xKQpgYGAKYGBge3IgfQp0b2MoKQojcHJpbnQoZml0MiwgZGlnaXRzPTIpCmBgYAoKIyMjIyBGaXQgdXNpbmcgc3Rhbl9nbG0gYW5kIG9wdGltaXphdGlvbgoKVXNpbmcgb3B0aW1pemF0aW9uIHdpdGggbm9ybWFsIGFwcHJveGltYXRpb24gYW5kIFBhcmV0byBzbW9vdGhlZAppbXBvcnRhbmNlIHJlc2FtcGxpbmcgcHJvdmlkZXMgY29lZmZpY2llbnQgc3RhbmRhcmQgZXJyb3JzLCBidXQKYWxzbyBkaWFnbm9zdGljIHdoZXRoZXIgbm9ybWFsIGFwcHJveGltYXRpb24gYXQgdGhlIG1vZGUgaXMKYXBwcm9wcmlhdGUuCgpgYGB7ciB9CnRpYygpCmZpdDMgPC0gc3Rhbl9nbG0oeSB+IC4sIGRhdGE9ZmFrZSwgYWxnb3JpdGhtPSdvcHRpbWl6aW5nJywgaW5pdD0wKSAKdG9jKCkKI3ByaW50KGZpdDMsIGRpZ2l0cz0yKQpgYGAKCiMjIExvZ2lzdGljIHJlZ3Jlc3Npb24gbj0xMEssIHA9MTAwCgojIyMjIENyZWF0ZSBmYWtlIGRhdGEgd2l0aCAxMFwsMDAwIG9ic2VydmF0aW9ucyBhbmQgcD0xMDAKCmBgYHtyIH0KU0VFRCA8LSAxNjU1CnNldC5zZWVkKFNFRUQpCm4gPC0gMWU0CmsgPC0gMTAwCnggPC0gcm5vcm0obikKeG4gPC0gbWF0cml4KHJub3JtKG4qKGstMSkpLG5yb3c9bikKYSA8LSAyCmIgPC0gMwpzaWdtYSA8LSAxCnkgPC0gYXMubnVtZXJpYyhhICsgYip4ICsgc2lnbWEqcm5vcm0obikgPiAwKQpmYWtlIDwtIGRhdGEuZnJhbWUoeCwgeG4sIHkpCmBgYAoKIyMjIyBGaXQgdXNpbmcgZ2xtCgpgYGB7ciB9CnRpYygpCmZpdDEgPC0gZ2xtKHkgfiAuLCBkYXRhPWZha2UsIGZhbWlseT1iaW5vbWlhbCgpKQp0b2MoKQojZGlzcGxheShmaXQxKQpgYGAKCiMjIyMgRml0IHVzaW5nIHN0YW5fZ2xtIGFuZCBNQ01DCgpgYGB7ciB9CnRpYygpCmZpdDIgPC0gc3Rhbl9nbG0oeSB+IC4sIGRhdGE9ZmFrZSwgZmFtaWx5PWJpbm9taWFsKCksIG1lYW5fUFBEPUZBTFNFLAogICAgICAgICAgICAgICAgIGluaXRfcj0wLjEsIHNlZWQ9U0VFRCkKdG9jKCkKI3ByaW50KGZpdDIsIGRpZ2l0cz0yKQpgYGAKCiMjIyMgRml0IHVzaW5nIHN0YW5fZ2xtIGFuZCBvcHRpbWl6YXRpb24KClVzaW5nIG9wdGltaXphdGlvbiB3aXRoIG5vcm1hbCBhcHByb3hpbWF0aW9uIGFuZCBQYXJldG8gc21vb3RoZWQKaW1wb3J0YW5jZSByZXNhbXBsaW5nIHByb3ZpZGVzIGNvZWZmaWNpZW50IHN0YW5kYXJkIGVycm9ycywgYnV0CmFsc28gZGlhZ25vc3RpYyB3aGV0aGVyIG5vcm1hbCBhcHByb3hpbWF0aW9uIGF0IHRoZSBtb2RlIGlzCmFwcHJvcHJpYXRlLgoKYGBge3IgfQp0aWMoKQpmaXQzIDwtIHN0YW5fZ2xtKHkgfiAuLCBkYXRhPWZha2UsIGZhbWlseT1iaW5vbWlhbCgpLAogICAgICAgICAgICAgICAgYWxnb3JpdGhtPSdvcHRpbWl6aW5nJywgaW5pdD0wLCBkcmF3cyA9IDE2MDAwLCBrZWVwX2V2ZXJ5ID0gNCkgCnRvYygpCiNwcmludChmaXQzLCBkaWdpdHM9MikKYGBgCgojIyBMb2dpc3RpYyByZWdyZXNzaW9uIG49MTAwSywgcD0xMDAKCiMjIyMgQ3JlYXRlIGZha2UgZGF0YSB3aXRoIDEwMFwsMDAwIG9ic2VydmF0aW9ucyBhbmQgcD0xMDAKCmBgYHtyIH0KU0VFRCA8LSAxNjU1CnNldC5zZWVkKFNFRUQpCm4gPC0gMWU1CmsgPC0gMTAwCnggPC0gcm5vcm0obikKeG4gPC0gbWF0cml4KHJub3JtKG4qKGstMSkpLG5yb3c9bikKYSA8LSAyCmIgPC0gMwpzaWdtYSA8LSAxCnkgPC0gYXMubnVtZXJpYyhhICsgYip4ICsgc2lnbWEqcm5vcm0obikgPiAwKQpmYWtlIDwtIGRhdGEuZnJhbWUoeCwgeG4sIHkpCmBgYAoKIyMjIyBGaXQgdXNpbmcgZ2xtCgpgYGB7ciB9CnRpYygpCmZpdDEgPC0gZ2xtKHkgfiAuLCBkYXRhPWZha2UsIGZhbWlseT1iaW5vbWlhbCgpKQp0b2MoKQojZGlzcGxheShmaXQxKQpgYGAKCiMjIyMgRml0IHVzaW5nIHN0YW5fZ2xtIGFuZCBvcHRpbWl6YXRpb24KClVzaW5nIG9wdGltaXphdGlvbiB3aXRoIG5vcm1hbCBhcHByb3hpbWF0aW9uIGFuZCBQYXJldG8gc21vb3RoZWQKaW1wb3J0YW5jZSByZXNhbXBsaW5nIHByb3ZpZGVzIGNvZWZmaWNpZW50IHN0YW5kYXJkIGVycm9ycywgYnV0CmFsc28gZGlhZ25vc3RpYyB3aGV0aGVyIG5vcm1hbCBhcHByb3hpbWF0aW9uIGF0IHRoZSBtb2RlIGlzCmFwcHJvcHJpYXRlLgoKYGBge3IgfQp0aWMoKQpmaXQzIDwtIHN0YW5fZ2xtKHkgfiAuLCBkYXRhPWZha2UsIGZhbWlseT1iaW5vbWlhbCgpLAogICAgICAgICAgICAgICAgIGFsZ29yaXRobT0nb3B0aW1pemluZycsIGluaXQ9MCkgCnRvYygpCnN1bW1hcnkoZml0MywgZGlnaXRzPTIpCmBgYAoK