Demonstrate Poisson regression. See Chapter 15 in Regression and Other Stories.


Load packages

library("rprojroot")
root<-has_file(".ROS-Examples-root")$make_fix_file()
library("rstanarm")
library("MASS")

Set random seed for reproducability

SEED <- 3579

Simulate fake data

n <- 100
x <- runif(n, -2, 2)
a <- 1
b <- 2
linpred <- a + b*x
y <- rpois(n, exp(linpred))
fake <- data.frame(x=x, y=y)
head(fake)
           x y
1  0.3342460 3
2 -0.2645611 0
3 -0.3269496 1
4  0.5807525 9
5  0.1624624 4
6 -1.3374593 0

Poisson regression

Fit Poisson regression model

fit_fake <- stan_glm(y ~ x, family=poisson(link="log"), data=fake, refresh=0)
print(fit_fake)
stan_glm
 family:       poisson [log]
 formula:      y ~ x
 observations: 100
 predictors:   2
------
            Median MAD_SD
(Intercept) 1.0    0.1   
x           2.0    0.1   

------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg
par(mar=c(3,3,1,1), mgp=c(1.7,.5,0), tck=-.01)
plot(x, y, ylim=c(-1, 200), yaxs="i", bty="l", main="Simulated data from Poisson regression", cex.main=0.9, type="n")
## curve(exp(a + b*x), from=-2.5, to=2.5, add=TRUE)
## Don't bother plotting true curve because it is so close to the fitted curve
curve(exp(coef(fit_fake)[1] + coef(fit_fake)[2]*x), from=-2.5, to=2.5, add=TRUE)
points(x, y, pch=20, cex=.6)

Overdispersion

phi_grid <- c(0.1, 1, 10)
K <- length(phi_grid)
y_nb <- as.list(rep(NA, K))
fake_nb <- as.list(rep(NA, K))
fit_nb <- as.list(rep(NA, K))
for (k in 1:K){
  y_nb[[k]] <- rnegbin(n, exp(linpred), phi_grid[k])
  fake_nb[[k]] <- data.frame(x=x, y=y_nb[[k]])
  fit_nb[[k]] <- stan_glm(y ~ x, family=neg_binomial_2(link="log"), data=fake_nb[[k]], refresh=0)
  print(fit_nb[[k]])
}
stan_glm
 family:       neg_binomial_2 [log]
 formula:      y ~ x
 observations: 100
 predictors:   2
------
            Median MAD_SD
(Intercept) 1.2    0.3   
x           1.9    0.3   

Auxiliary parameter(s):
                      Median MAD_SD
reciprocal_dispersion 0.1    0.0   

------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg
stan_glm
 family:       neg_binomial_2 [log]
 formula:      y ~ x
 observations: 100
 predictors:   2
------
            Median MAD_SD
(Intercept) 1.0    0.2   
x           2.0    0.1   

Auxiliary parameter(s):
                      Median MAD_SD
reciprocal_dispersion 1.0    0.2   

------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg
stan_glm
 family:       neg_binomial_2 [log]
 formula:      y ~ x
 observations: 100
 predictors:   2
------
            Median MAD_SD
(Intercept) 0.9    0.1   
x           2.0    0.1   

Auxiliary parameter(s):
                      Median MAD_SD
reciprocal_dispersion 5.8    1.4   

------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg
par(mar=c(3,3,1,1), mgp=c(1.7,.5,0), tck=-.01)
par(mfrow=c(1,3), oma=c(0,0,2,0))
for (k in 1:K) {
  phi <- phi_grid[k]
  if (phi==0.1)
    plot(x, y_nb[[k]], ylim=c(-1, 200), yaxs="i", bty="l", ylab="y", main=expression(paste(phi, " = ", 0.1)), type="n")
  else if (phi==1)
    plot(x, y_nb[[k]], ylim=c(-1, 200), yaxs="i", bty="l", ylab="y", main=expression(paste(phi, " = ", 1)), type="n")
  else if (phi==10)
    plot(x, y_nb[[k]], ylim=c(-1, 200), yaxs="i", bty="l", ylab="y", main=expression(paste(phi, " = ", 10)), type="n")
  ## curve(exp(a + b*x), from=-2.5, to=2.5, add=TRUE)
  ## Don't bother plotting true curve because it is so close to the fitted curve
  curve(exp(coef(fit_nb[[k]])[1] + coef(fit_nb[[k]])[2]*x), from=-2.5, to=2.5, add=TRUE)
  points(x, y_nb[[k]], pch=20, cex=.7)
}
mtext("Simulated data from overdispersed Poisson (negative binomial) regression", outer=TRUE, side=3, line=1, cex=0.8)

LS0tCnRpdGxlOiAiUmVncmVzc2lvbiBhbmQgT3RoZXIgU3RvcmllczogUG9pc3NvbiBFeGFtcGxlIgphdXRob3I6ICJBbmRyZXcgR2VsbWFuLCBKZW5uaWZlciBIaWxsLCBBa2kgVmVodGFyaSIKZGF0ZTogImByIGZvcm1hdChTeXMuRGF0ZSgpKWAiCm91dHB1dDoKICBodG1sX2RvY3VtZW50OgogICAgdGhlbWU6IHJlYWRhYmxlCiAgICB0b2M6IHRydWUKICAgIHRvY19kZXB0aDogMgogICAgdG9jX2Zsb2F0OiB0cnVlCiAgICBjb2RlX2Rvd25sb2FkOiB0cnVlCi0tLQpEZW1vbnN0cmF0ZSBQb2lzc29uIHJlZ3Jlc3Npb24uIFNlZSBDaGFwdGVyIDE1IGluClJlZ3Jlc3Npb24gYW5kIE90aGVyIFN0b3JpZXMuCgotLS0tLS0tLS0tLS0tCgoKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9CmtuaXRyOjpvcHRzX2NodW5rJHNldChtZXNzYWdlPUZBTFNFLCBlcnJvcj1GQUxTRSwgd2FybmluZz1GQUxTRSwgY29tbWVudD1OQSkKIyBzd2l0Y2ggdGhpcyB0byBUUlVFIHRvIHNhdmUgZmlndXJlcyBpbiBzZXBhcmF0ZSBmaWxlcwpzYXZlZmlncyA8LSBGQUxTRQpgYGAKCiMjIyMgTG9hZCBwYWNrYWdlcwoKYGBge3IgfQpsaWJyYXJ5KCJycHJvanJvb3QiKQpyb290PC1oYXNfZmlsZSgiLlJPUy1FeGFtcGxlcy1yb290IikkbWFrZV9maXhfZmlsZSgpCmxpYnJhcnkoInJzdGFuYXJtIikKbGlicmFyeSgiTUFTUyIpCmBgYAoKU2V0IHJhbmRvbSBzZWVkIGZvciByZXByb2R1Y2FiaWxpdHkKCmBgYHtyIH0KU0VFRCA8LSAzNTc5CmBgYAoKIyMgU2ltdWxhdGUgZmFrZSBkYXRhCgpgYGB7ciB9Cm4gPC0gMTAwCnggPC0gcnVuaWYobiwgLTIsIDIpCmEgPC0gMQpiIDwtIDIKbGlucHJlZCA8LSBhICsgYip4CnkgPC0gcnBvaXMobiwgZXhwKGxpbnByZWQpKQpmYWtlIDwtIGRhdGEuZnJhbWUoeD14LCB5PXkpCmhlYWQoZmFrZSkKYGBgCgojIyBQb2lzc29uIHJlZ3Jlc3Npb24KCiMjIyMgRml0IFBvaXNzb24gcmVncmVzc2lvbiBtb2RlbAoKYGBge3IgfQpmaXRfZmFrZSA8LSBzdGFuX2dsbSh5IH4geCwgZmFtaWx5PXBvaXNzb24obGluaz0ibG9nIiksIGRhdGE9ZmFrZSwgcmVmcmVzaD0wKQpwcmludChmaXRfZmFrZSkKCmBgYApgYGB7ciBldmFsPUZBTFNFLCBpbmNsdWRlPUZBTFNFfQppZiAoc2F2ZWZpZ3MpIHBkZigicG9pc3NvbjEucGRmIiwgaGVpZ2h0PTQsIHdpZHRoPTUpCmBgYAoKCgpgYGB7ciB9CnBhcihtYXI9YygzLDMsMSwxKSwgbWdwPWMoMS43LC41LDApLCB0Y2s9LS4wMSkKcGxvdCh4LCB5LCB5bGltPWMoLTEsIDIwMCksIHlheHM9ImkiLCBidHk9ImwiLCBtYWluPSJTaW11bGF0ZWQgZGF0YSBmcm9tIFBvaXNzb24gcmVncmVzc2lvbiIsIGNleC5tYWluPTAuOSwgdHlwZT0ibiIpCiMjIGN1cnZlKGV4cChhICsgYip4KSwgZnJvbT0tMi41LCB0bz0yLjUsIGFkZD1UUlVFKQojIyBEb24ndCBib3RoZXIgcGxvdHRpbmcgdHJ1ZSBjdXJ2ZSBiZWNhdXNlIGl0IGlzIHNvIGNsb3NlIHRvIHRoZSBmaXR0ZWQgY3VydmUKY3VydmUoZXhwKGNvZWYoZml0X2Zha2UpWzFdICsgY29lZihmaXRfZmFrZSlbMl0qeCksIGZyb209LTIuNSwgdG89Mi41LCBhZGQ9VFJVRSkKcG9pbnRzKHgsIHksIHBjaD0yMCwgY2V4PS42KQpgYGAKYGBge3IgZXZhbD1GQUxTRSwgaW5jbHVkZT1GQUxTRX0KaWYgKHNhdmVmaWdzKSBkZXYub2ZmKCkKYGBgCgojIyBPdmVyZGlzcGVyc2lvbgoKCmBgYHtyIH0KcGhpX2dyaWQgPC0gYygwLjEsIDEsIDEwKQpLIDwtIGxlbmd0aChwaGlfZ3JpZCkKeV9uYiA8LSBhcy5saXN0KHJlcChOQSwgSykpCmZha2VfbmIgPC0gYXMubGlzdChyZXAoTkEsIEspKQpmaXRfbmIgPC0gYXMubGlzdChyZXAoTkEsIEspKQpmb3IgKGsgaW4gMTpLKXsKICB5X25iW1trXV0gPC0gcm5lZ2JpbihuLCBleHAobGlucHJlZCksIHBoaV9ncmlkW2tdKQogIGZha2VfbmJbW2tdXSA8LSBkYXRhLmZyYW1lKHg9eCwgeT15X25iW1trXV0pCiAgZml0X25iW1trXV0gPC0gc3Rhbl9nbG0oeSB+IHgsIGZhbWlseT1uZWdfYmlub21pYWxfMihsaW5rPSJsb2ciKSwgZGF0YT1mYWtlX25iW1trXV0sIHJlZnJlc2g9MCkKICBwcmludChmaXRfbmJbW2tdXSkKfQoKYGBgCmBgYHtyIGV2YWw9RkFMU0UsIGluY2x1ZGU9RkFMU0V9CmlmIChzYXZlZmlncykgcGRmKCJwb2lzc29uMi5wZGYiLCBoZWlnaHQ9Mywgd2lkdGg9OSkKYGBgCgoKCmBgYHtyIH0KcGFyKG1hcj1jKDMsMywxLDEpLCBtZ3A9YygxLjcsLjUsMCksIHRjaz0tLjAxKQpwYXIobWZyb3c9YygxLDMpLCBvbWE9YygwLDAsMiwwKSkKZm9yIChrIGluIDE6SykgewogIHBoaSA8LSBwaGlfZ3JpZFtrXQogIGlmIChwaGk9PTAuMSkKICAgIHBsb3QoeCwgeV9uYltba11dLCB5bGltPWMoLTEsIDIwMCksIHlheHM9ImkiLCBidHk9ImwiLCB5bGFiPSJ5IiwgbWFpbj1leHByZXNzaW9uKHBhc3RlKHBoaSwgIiA9ICIsIDAuMSkpLCB0eXBlPSJuIikKICBlbHNlIGlmIChwaGk9PTEpCiAgICBwbG90KHgsIHlfbmJbW2tdXSwgeWxpbT1jKC0xLCAyMDApLCB5YXhzPSJpIiwgYnR5PSJsIiwgeWxhYj0ieSIsIG1haW49ZXhwcmVzc2lvbihwYXN0ZShwaGksICIgPSAiLCAxKSksIHR5cGU9Im4iKQogIGVsc2UgaWYgKHBoaT09MTApCiAgICBwbG90KHgsIHlfbmJbW2tdXSwgeWxpbT1jKC0xLCAyMDApLCB5YXhzPSJpIiwgYnR5PSJsIiwgeWxhYj0ieSIsIG1haW49ZXhwcmVzc2lvbihwYXN0ZShwaGksICIgPSAiLCAxMCkpLCB0eXBlPSJuIikKICAjIyBjdXJ2ZShleHAoYSArIGIqeCksIGZyb209LTIuNSwgdG89Mi41LCBhZGQ9VFJVRSkKICAjIyBEb24ndCBib3RoZXIgcGxvdHRpbmcgdHJ1ZSBjdXJ2ZSBiZWNhdXNlIGl0IGlzIHNvIGNsb3NlIHRvIHRoZSBmaXR0ZWQgY3VydmUKICBjdXJ2ZShleHAoY29lZihmaXRfbmJbW2tdXSlbMV0gKyBjb2VmKGZpdF9uYltba11dKVsyXSp4KSwgZnJvbT0tMi41LCB0bz0yLjUsIGFkZD1UUlVFKQogIHBvaW50cyh4LCB5X25iW1trXV0sIHBjaD0yMCwgY2V4PS43KQp9Cm10ZXh0KCJTaW11bGF0ZWQgZGF0YSBmcm9tIG92ZXJkaXNwZXJzZWQgUG9pc3NvbiAobmVnYXRpdmUgYmlub21pYWwpIHJlZ3Jlc3Npb24iLCBvdXRlcj1UUlVFLCBzaWRlPTMsIGxpbmU9MSwgY2V4PTAuOCkKYGBgCmBgYHtyIGV2YWw9RkFMU0UsIGluY2x1ZGU9RkFMU0V9CmlmIChzYXZlZmlncykgZGV2Lm9mZigpCmBgYAoK