Comparison of robit and logit models for binary data. See Chapter 15 in Regression and Other Stories.


Load packages

library("rprojroot")
root<-has_file(".ROS-Examples-root")$make_fix_file()
library("cmdstanr")
options(mc.cores = 1)
library("ggdist")
logit <- qlogis
invlogit <- plogis

Generate data from logit model

# set the random seed to get reproducible results
# change the seed to experiment with variation due to random noise
set.seed(1234)
N <- 50
x <- runif(N, -9, 9)
a <- 0
b <- 0.8
p <- invlogit(a + b*x)
y <- rbinom(N, 1, p)
df <- 4
data_1 <- list(N=N, x=x, y=y, df=df)

Fit logit and probit models using the simulated data

Show Stan code for the models

writeLines(readLines(root("Robit","logit.stan")))
data {
  int N;
  vector[N] x;
  int y[N];
}
parameters {
  real a;
  real b;
}
model {
  y ~ bernoulli_logit(a + b*x);
}
writeLines(readLines(root("Robit","robit.stan")))
data {
  int N;
  vector[N] x;
  int y[N];
  real df;
}
parameters {
  real a;
  real b;
}
model {
  vector[N] p;
  for (n in 1:N) p[n] = student_t_cdf(a + b*x[n], df, 0, sqrt((df - 2)/df));
  y ~ bernoulli(p);
}

Compile models

logit_model <- cmdstan_model("logit.stan")
robit_model <- cmdstan_model("robit.stan")

Sample and compute posterior medians

fit_logit_1 <- logit_model$sample(data=data_1, refresh=0)
Running MCMC with 4 sequential chains...

Chain 1 finished in 0.0 seconds.
Chain 2 finished in 0.0 seconds.
Chain 3 finished in 0.0 seconds.
Chain 4 finished in 0.0 seconds.

All 4 chains finished successfully.
Mean chain execution time: 0.0 seconds.
Total execution time: 0.6 seconds.
print(fit_logit_1)
 variable  mean median   sd  mad     q5   q95 rhat ess_bulk ess_tail
     lp__ -9.85  -9.49 1.13 0.80 -12.13 -8.76 1.00     1509     1559
     a    -0.63  -0.60 0.74 0.72  -1.91  0.53 1.00     1725     1888
     b     1.44   1.36 0.52 0.47   0.77  2.39 1.00     1737     1364
a_hat_logit_1 <- median(fit_logit_1$draws("a"))
b_hat_logit_1 <- median(fit_logit_1$draws("b"))

Sample and compute posterior medians

fit_robit_1 <- robit_model$sample(data=data_1, refresh=0)
Running MCMC with 4 sequential chains...

Chain 1 finished in 0.3 seconds.
Chain 2 finished in 0.3 seconds.
Chain 3 finished in 0.3 seconds.
Chain 4 finished in 0.3 seconds.

All 4 chains finished successfully.
Mean chain execution time: 0.3 seconds.
Total execution time: 1.4 seconds.
print(fit_robit_1)
 variable   mean median   sd  mad     q5   q95 rhat ess_bulk ess_tail
     lp__ -10.04  -9.65 1.23 0.89 -12.61 -8.86 1.00     1065     1047
     a     -0.34  -0.31 0.40 0.38  -1.04  0.27 1.00     1146     1300
     b      0.86   0.76 0.42 0.30   0.40  1.66 1.00     1072     1148
a_hat_robit_1 <- median(fit_robit_1$draws("a"))
b_hat_robit_1 <- median(fit_robit_1$draws("b"))

Plot

if (savefigs) pdf("logistic2b.pdf", height=4, width=6)
par(mar=c(3,3,2,1), mgp=c(1.5,.5,0), tck=-.01)
plot(data_1$x, data_1$y, yaxt="n", main="Data from a logistic regression", xlab="x", ylab="y")
axis(2, c(0,1))
curve(invlogit(a_hat_logit_1 + b_hat_logit_1*x), add=TRUE, lty=2)
curve(pstudent_t(a_hat_robit_1 + b_hat_robit_1*x, data_1$df, 0, sqrt((data_1$df-2)/data_1$df)), add=TRUE, lty=1)
legend (1, .3, c("fitted logistic regression", "fitted robit regression"), lty=c(2,1), cex=.8)

Add an outlier by flipping the class of one observation

low_value <- (1:N)[x==sort(x)[4]]
data_2 <- data_1
data_2$y[low_value] <- 1

Sample and compute posterior medians

fit_logit_2 <- logit_model$sample(data=data_2, refresh=0)
Running MCMC with 4 sequential chains...

Chain 1 finished in 0.0 seconds.
Chain 2 finished in 0.0 seconds.
Chain 3 finished in 0.1 seconds.
Chain 4 finished in 0.0 seconds.

All 4 chains finished successfully.
Mean chain execution time: 0.0 seconds.
Total execution time: 0.5 seconds.
print(fit_logit_2)
 variable   mean median   sd  mad     q5    q95 rhat ess_bulk ess_tail
     lp__ -15.60 -15.25 1.08 0.75 -17.78 -14.59 1.00     1819     2284
     a      0.04   0.04 0.51 0.50  -0.77   0.89 1.00     2705     2184
     b      0.74   0.72 0.20 0.18   0.46   1.10 1.00     2828     2353
a_hat_logit_2 <- median(fit_logit_2$draws("a"))
b_hat_logit_2 <- median(fit_logit_2$draws("b"))

Sample and compute posterior medians

fit_robit_2 <- robit_model$sample(data=data_2, refresh=0)
Running MCMC with 4 sequential chains...

Chain 1 finished in 0.3 seconds.
Chain 2 finished in 0.3 seconds.
Chain 3 finished in 0.3 seconds.
Chain 4 finished in 0.3 seconds.

All 4 chains finished successfully.
Mean chain execution time: 0.3 seconds.
Total execution time: 1.5 seconds.
print(fit_robit_2)
 variable   mean median   sd  mad     q5    q95 rhat ess_bulk ess_tail
     lp__ -15.21 -14.89 1.09 0.79 -17.49 -14.16 1.00     1393     1616
     a     -0.08  -0.07 0.29 0.28  -0.54   0.40 1.00     1580     1860
     b      0.47   0.44 0.17 0.14   0.26   0.78 1.00     2021     1484
a_hat_robit_2 <- median(fit_robit_2$draws("a"))
b_hat_robit_2 <- median(fit_robit_2$draws("b"))

Plot

if (savefigs) pdf("logistic2a.pdf", height=4, width=6)
par(mar=c(3,3,2,1), mgp=c(1.5,.5,0), tck=-.01)
plot(data_2$x, data_2$y, yaxt="n", main="Contaminated data", xlab="x", ylab="y")
axis(2, c(0,1))
curve(invlogit(a_hat_logit_2 + b_hat_logit_2*x), add=TRUE, lty=2)
curve(pstudent_t(a_hat_robit_2 + b_hat_robit_2*x, data_2$df, 0, sqrt((data_2$df-2)/data_2$df)), add=TRUE, lty=1)
legend (1, .3, c("fitted logistic regression", "fitted robit regression"), lty=c(2,1), cex=.8)

IycgLS0tCiMnIHRpdGxlOiAiUmVncmVzc2lvbiBhbmQgT3RoZXIgU3RvcmllczogUm9iaXQiCiMnIGF1dGhvcjogIkFuZHJldyBHZWxtYW4sIEplbm5pZmVyIEhpbGwsIEFraSBWZWh0YXJpIgojJyBkYXRlOiAiYHIgZm9ybWF0KFN5cy5EYXRlKCkpYCIKIycgb3V0cHV0OgojJyAgIGh0bWxfZG9jdW1lbnQ6CiMnICAgICB0aGVtZTogcmVhZGFibGUKIycgICAgIHRvYzogdHJ1ZQojJyAgICAgdG9jX2RlcHRoOiAyCiMnICAgICB0b2NfZmxvYXQ6IHRydWUKIycgICAgIGNvZGVfZG93bmxvYWQ6IHRydWUKIycgLS0tCgojJyBDb21wYXJpc29uIG9mIHJvYml0IGFuZCBsb2dpdCBtb2RlbHMgZm9yIGJpbmFyeSBkYXRhLiBTZWUgQ2hhcHRlcgojJyAxNSBpbiBSZWdyZXNzaW9uIGFuZCBPdGhlciBTdG9yaWVzLgojJyAKIycgLS0tLS0tLS0tLS0tLQojJyAKCiMrIHNldHVwLCBpbmNsdWRlPUZBTFNFCmtuaXRyOjpvcHRzX2NodW5rJHNldChtZXNzYWdlPUZBTFNFLCBlcnJvcj1GQUxTRSwgd2FybmluZz1GQUxTRSwgY29tbWVudD1OQSkKIyBzd2l0Y2ggdGhpcyB0byBUUlVFIHRvIHNhdmUgZmlndXJlcyBpbiBzZXBhcmF0ZSBmaWxlcwpzYXZlZmlncyA8LSBGQUxTRQoKIycgIyMjIyBMb2FkIHBhY2thZ2VzCmxpYnJhcnkoInJwcm9qcm9vdCIpCnJvb3Q8LWhhc19maWxlKCIuUk9TLUV4YW1wbGVzLXJvb3QiKSRtYWtlX2ZpeF9maWxlKCkKbGlicmFyeSgiY21kc3RhbnIiKQpvcHRpb25zKG1jLmNvcmVzID0gMSkKbGlicmFyeSgiZ2dkaXN0IikKbG9naXQgPC0gcWxvZ2lzCmludmxvZ2l0IDwtIHBsb2dpcwoKIycgIyMgR2VuZXJhdGUgZGF0YSBmcm9tIGxvZ2l0IG1vZGVsCgojIHNldCB0aGUgcmFuZG9tIHNlZWQgdG8gZ2V0IHJlcHJvZHVjaWJsZSByZXN1bHRzCiMgY2hhbmdlIHRoZSBzZWVkIHRvIGV4cGVyaW1lbnQgd2l0aCB2YXJpYXRpb24gZHVlIHRvIHJhbmRvbSBub2lzZQpzZXQuc2VlZCgxMjM0KQpOIDwtIDUwCnggPC0gcnVuaWYoTiwgLTksIDkpCmEgPC0gMApiIDwtIDAuOApwIDwtIGludmxvZ2l0KGEgKyBiKngpCnkgPC0gcmJpbm9tKE4sIDEsIHApCmRmIDwtIDQKZGF0YV8xIDwtIGxpc3QoTj1OLCB4PXgsIHk9eSwgZGY9ZGYpCgojJyAjIyBGaXQgbG9naXQgYW5kIHByb2JpdCBtb2RlbHMgdXNpbmcgdGhlIHNpbXVsYXRlZCBkYXRhCiMnCiMnICMjIyMgU2hvdyBTdGFuIGNvZGUgZm9yIHRoZSBtb2RlbHMKd3JpdGVMaW5lcyhyZWFkTGluZXMocm9vdCgiUm9iaXQiLCJsb2dpdC5zdGFuIikpKQp3cml0ZUxpbmVzKHJlYWRMaW5lcyhyb290KCJSb2JpdCIsInJvYml0LnN0YW4iKSkpCgojJyAjIyMjIENvbXBpbGUgbW9kZWxzCmxvZ2l0X21vZGVsIDwtIGNtZHN0YW5fbW9kZWwoImxvZ2l0LnN0YW4iKQpyb2JpdF9tb2RlbCA8LSBjbWRzdGFuX21vZGVsKCJyb2JpdC5zdGFuIikKCiMnICMjIyMgU2FtcGxlIGFuZCBjb21wdXRlIHBvc3RlcmlvciBtZWRpYW5zCmZpdF9sb2dpdF8xIDwtIGxvZ2l0X21vZGVsJHNhbXBsZShkYXRhPWRhdGFfMSwgcmVmcmVzaD0wKQpwcmludChmaXRfbG9naXRfMSkKYV9oYXRfbG9naXRfMSA8LSBtZWRpYW4oZml0X2xvZ2l0XzEkZHJhd3MoImEiKSkKYl9oYXRfbG9naXRfMSA8LSBtZWRpYW4oZml0X2xvZ2l0XzEkZHJhd3MoImIiKSkKCiMnICMjIyMgU2FtcGxlIGFuZCBjb21wdXRlIHBvc3RlcmlvciBtZWRpYW5zCmZpdF9yb2JpdF8xIDwtIHJvYml0X21vZGVsJHNhbXBsZShkYXRhPWRhdGFfMSwgcmVmcmVzaD0wKQpwcmludChmaXRfcm9iaXRfMSkKYV9oYXRfcm9iaXRfMSA8LSBtZWRpYW4oZml0X3JvYml0XzEkZHJhd3MoImEiKSkKYl9oYXRfcm9iaXRfMSA8LSBtZWRpYW4oZml0X3JvYml0XzEkZHJhd3MoImIiKSkKCiMnICMjIyMgUGxvdAppZiAoc2F2ZWZpZ3MpIHBkZigibG9naXN0aWMyYi5wZGYiLCBoZWlnaHQ9NCwgd2lkdGg9NikKIysKcGFyKG1hcj1jKDMsMywyLDEpLCBtZ3A9YygxLjUsLjUsMCksIHRjaz0tLjAxKQpwbG90KGRhdGFfMSR4LCBkYXRhXzEkeSwgeWF4dD0ibiIsIG1haW49IkRhdGEgZnJvbSBhIGxvZ2lzdGljIHJlZ3Jlc3Npb24iLCB4bGFiPSJ4IiwgeWxhYj0ieSIpCmF4aXMoMiwgYygwLDEpKQpjdXJ2ZShpbnZsb2dpdChhX2hhdF9sb2dpdF8xICsgYl9oYXRfbG9naXRfMSp4KSwgYWRkPVRSVUUsIGx0eT0yKQpjdXJ2ZShwc3R1ZGVudF90KGFfaGF0X3JvYml0XzEgKyBiX2hhdF9yb2JpdF8xKngsIGRhdGFfMSRkZiwgMCwgc3FydCgoZGF0YV8xJGRmLTIpL2RhdGFfMSRkZikpLCBhZGQ9VFJVRSwgbHR5PTEpCmxlZ2VuZCAoMSwgLjMsIGMoImZpdHRlZCBsb2dpc3RpYyByZWdyZXNzaW9uIiwgImZpdHRlZCByb2JpdCByZWdyZXNzaW9uIiksIGx0eT1jKDIsMSksIGNleD0uOCkKIysgZXZhbD1GQUxTRSwgaW5jbHVkZT1GQUxTRQppZiAoc2F2ZWZpZ3MpIGRldi5vZmYoKQoKIycgIyMgQWRkIGFuIG91dGxpZXIgYnkgZmxpcHBpbmcgdGhlIGNsYXNzIG9mIG9uZSBvYnNlcnZhdGlvbgpsb3dfdmFsdWUgPC0gKDE6TilbeD09c29ydCh4KVs0XV0KZGF0YV8yIDwtIGRhdGFfMQpkYXRhXzIkeVtsb3dfdmFsdWVdIDwtIDEKCiMnICMjIyMgU2FtcGxlIGFuZCBjb21wdXRlIHBvc3RlcmlvciBtZWRpYW5zCmZpdF9sb2dpdF8yIDwtIGxvZ2l0X21vZGVsJHNhbXBsZShkYXRhPWRhdGFfMiwgcmVmcmVzaD0wKQpwcmludChmaXRfbG9naXRfMikKYV9oYXRfbG9naXRfMiA8LSBtZWRpYW4oZml0X2xvZ2l0XzIkZHJhd3MoImEiKSkKYl9oYXRfbG9naXRfMiA8LSBtZWRpYW4oZml0X2xvZ2l0XzIkZHJhd3MoImIiKSkKCiMnICMjIyMgU2FtcGxlIGFuZCBjb21wdXRlIHBvc3RlcmlvciBtZWRpYW5zCmZpdF9yb2JpdF8yIDwtIHJvYml0X21vZGVsJHNhbXBsZShkYXRhPWRhdGFfMiwgcmVmcmVzaD0wKQpwcmludChmaXRfcm9iaXRfMikKYV9oYXRfcm9iaXRfMiA8LSBtZWRpYW4oZml0X3JvYml0XzIkZHJhd3MoImEiKSkKYl9oYXRfcm9iaXRfMiA8LSBtZWRpYW4oZml0X3JvYml0XzIkZHJhd3MoImIiKSkKCiMnIFBsb3QKaWYgKHNhdmVmaWdzKSBwZGYoImxvZ2lzdGljMmEucGRmIiwgaGVpZ2h0PTQsIHdpZHRoPTYpCiMrCnBhcihtYXI9YygzLDMsMiwxKSwgbWdwPWMoMS41LC41LDApLCB0Y2s9LS4wMSkKcGxvdChkYXRhXzIkeCwgZGF0YV8yJHksIHlheHQ9Im4iLCBtYWluPSJDb250YW1pbmF0ZWQgZGF0YSIsIHhsYWI9IngiLCB5bGFiPSJ5IikKYXhpcygyLCBjKDAsMSkpCmN1cnZlKGludmxvZ2l0KGFfaGF0X2xvZ2l0XzIgKyBiX2hhdF9sb2dpdF8yKngpLCBhZGQ9VFJVRSwgbHR5PTIpCmN1cnZlKHBzdHVkZW50X3QoYV9oYXRfcm9iaXRfMiArIGJfaGF0X3JvYml0XzIqeCwgZGF0YV8yJGRmLCAwLCBzcXJ0KChkYXRhXzIkZGYtMikvZGF0YV8yJGRmKSksIGFkZD1UUlVFLCBsdHk9MSkKbGVnZW5kICgxLCAuMywgYygiZml0dGVkIGxvZ2lzdGljIHJlZ3Jlc3Npb24iLCAiZml0dGVkIHJvYml0IHJlZ3Jlc3Npb24iKSwgbHR5PWMoMiwxKSwgY2V4PS44KQojKyBldmFsPUZBTFNFLCBpbmNsdWRlPUZBTFNFCmlmIChzYXZlZmlncykgZGV2Lm9mZigpCg==