Fake dataset of a randomized experiment on student grades. See Chapter 16 in Regression and Other Stories.


Load packages

library("rstanarm")

First simulation

n <- 100
y_if_control <- rnorm(n, 60, 20)
y_if_treated <- y_if_control + 5
z <- sample(rep(c(0,1), n/2))
y <- ifelse(z==1, y_if_treated, y_if_control)
fake <- data.frame(y, z)
diff <- mean(y[z==1]) - mean(y[z==0])
se_diff <- sqrt(sd(y[z==0])^2/sum(z==0) + sd(y[z==1])^2/sum(z==1))
print(c(diff, se_diff), digits=2)
[1] 1.6 3.9
fit_1a <- stan_glm(y ~ z, data=fake, refresh=0)
print(fit_1a)
stan_glm
 family:       gaussian [identity]
 formula:      y ~ z
 observations: 100
 predictors:   2
------
            Median MAD_SD
(Intercept) 63.5    2.7  
z            1.5    3.9  

Auxiliary parameter(s):
      Median MAD_SD
sigma 19.3    1.4  

------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg
fake$x <- rnorm(n, 50, 20)
fit_1b <- stan_glm(y ~ z + x, data=fake, refresh=0)
print(fit_1b)
stan_glm
 family:       gaussian [identity]
 formula:      y ~ z + x
 observations: 100
 predictors:   3
------
            Median MAD_SD
(Intercept) 63.8    5.5  
z            1.6    3.9  
x            0.0    0.1  

Auxiliary parameter(s):
      Median MAD_SD
sigma 19.5    1.4  

------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg

Simulation with realistic pre-test

n <- 100
true_ability <- rnorm(n, 50, 16)
x <- true_ability + rnorm(n, 0, 12)
y_if_control <- true_ability + rnorm(n, 0, 12) + 10
y_if_treated <- y_if_control + 5
z <- sample(rep(c(0,1), n/2))
y <- ifelse(z==1, y_if_treated, y_if_control)
fake_2 <- data.frame(x, y, z)
fit_2a <- stan_glm(y ~ z, data=fake_2, refresh=0)
fit_2b <- stan_glm(y ~ z + x, data=fake_2, refresh=0)
print(fit_2a)
stan_glm
 family:       gaussian [identity]
 formula:      y ~ z
 observations: 100
 predictors:   2
------
            Median MAD_SD
(Intercept) 61.6    3.0  
z            1.0    4.3  

Auxiliary parameter(s):
      Median MAD_SD
sigma 20.5    1.5  

------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg
print(fit_2b)
stan_glm
 family:       gaussian [identity]
 formula:      y ~ z + x
 observations: 100
 predictors:   3
------
            Median MAD_SD
(Intercept) 29.1    5.0  
z            2.3    3.2  
x            0.6    0.1  

Auxiliary parameter(s):
      Median MAD_SD
sigma 16.1    1.1  

------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg

Simulating selection bias in treatment assignment

invlogit <- plogis
z <- rbinom(n, 1, invlogit(-(x-50)/20))

pdf("design_bias.pdf", height=4, width=6)
par(mar=c(3, 3, 2, 1), mgp=c(1.7, .5, 0), tck=-.01)
plot(x, ifelse(z==1, .99, .01), ylim=c(0,1), xlab="Pre-test score", ylab="Pr (z=1)", xaxt="n", yaxt="n", yaxs="i", bty="l", pch=20)
axis(1, seq(0,100,20))
axis(2, seq(0,1,.5))
text(45, 0.93, "(assigned to treatment group, z=1)", col="gray40")
text(55, 0.07, "(assigned to control group, z=0)", col="gray40")
curve(invlogit(-(x-50)/20), add=TRUE)
dev.off()
png 
  2 
y <- ifelse(z==1, y_if_treated, y_if_control)
fake_3 <- data.frame(x, y, z)

Experiment <- function(n) {
  true_ability <- rnorm(n, 50, 16)
  x <- true_ability + rnorm(n, 0, 12)
  y_if_control <- true_ability + rnorm(n, 0, 12) + 10
  y_if_treated <- y_if_control + 5
  z <- rbinom(n, 1, invlogit(-(x-50)/20))
  y <- ifelse(z==1, y_if_treated, y_if_control)
  fake_3 <- data.frame(x, y, z)
  fit_3a <- stan_glm(y ~ z, data=fake_3, refresh=0)
  fit_3b <- stan_glm(y ~ z + x, data=fake_3, refresh=0)
  inferences <- rbind(c(coef(fit_3a)["z"], se(fit_3a)["z"]), c(coef(fit_3b)["z"], se(fit_3b)["z"]))
  return(inferences)
}

n <- 100
n_loop <- 50
results <- array(NA, c(n_loop, 2, 2), dimnames=list(1:n_loop, c("Simple", "Adjusted"), c("Estimate", "SE")))
for (loop in 1:n_loop){
  results[loop,,] <- Experiment(n)
}
results_avg <- apply(results, c(2,3), mean)
LS0tCnRpdGxlOiAiUmVncmVzc2lvbiBhbmQgT3RoZXIgU3RvcmllczogRmFrZSBkYXRhc2V0IG9mIGEgcmFuZG9taXplZCBleHBlcmltZW50IG9uIHN0dWRlbnQgZ3JhZGVzIgphdXRob3I6ICJBbmRyZXcgR2VsbWFuLCBKZW5uaWZlciBIaWxsLCBBa2kgVmVodGFyaSIKZGF0ZTogImByIGZvcm1hdChTeXMuRGF0ZSgpKWAiCm91dHB1dDoKICBodG1sX2RvY3VtZW50OgogICAgdGhlbWU6IHJlYWRhYmxlCiAgICB0b2M6IHRydWUKICAgIHRvY19kZXB0aDogMgogICAgdG9jX2Zsb2F0OiB0cnVlCiAgICBjb2RlX2Rvd25sb2FkOiB0cnVlCi0tLQpGYWtlIGRhdGFzZXQgb2YgYSByYW5kb21pemVkIGV4cGVyaW1lbnQgb24gc3R1ZGVudCBncmFkZXMuIFNlZQpDaGFwdGVyIDE2IGluIFJlZ3Jlc3Npb24gYW5kIE90aGVyIFN0b3JpZXMuCgotLS0tLS0tLS0tLS0tCgoKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9CmtuaXRyOjpvcHRzX2NodW5rJHNldChtZXNzYWdlPUZBTFNFLCBlcnJvcj1GQUxTRSwgd2FybmluZz1GQUxTRSwgY29tbWVudD1OQSkKIyBzd2l0Y2ggdGhpcyB0byBUUlVFIHRvIHNhdmUgZmlndXJlcyBpbiBzZXBhcmF0ZSBmaWxlcwpzYXZlZmlncyA8LSBGQUxTRQpgYGAKCiMjIyMgTG9hZCBwYWNrYWdlcwoKYGBge3IgfQpsaWJyYXJ5KCJyc3RhbmFybSIpCmBgYAoKIyMjIyBGaXJzdCBzaW11bGF0aW9uCgpgYGB7ciB9Cm4gPC0gMTAwCnlfaWZfY29udHJvbCA8LSBybm9ybShuLCA2MCwgMjApCnlfaWZfdHJlYXRlZCA8LSB5X2lmX2NvbnRyb2wgKyA1CnogPC0gc2FtcGxlKHJlcChjKDAsMSksIG4vMikpCnkgPC0gaWZlbHNlKHo9PTEsIHlfaWZfdHJlYXRlZCwgeV9pZl9jb250cm9sKQpmYWtlIDwtIGRhdGEuZnJhbWUoeSwgeikKZGlmZiA8LSBtZWFuKHlbej09MV0pIC0gbWVhbih5W3o9PTBdKQpzZV9kaWZmIDwtIHNxcnQoc2QoeVt6PT0wXSleMi9zdW0oej09MCkgKyBzZCh5W3o9PTFdKV4yL3N1bSh6PT0xKSkKcHJpbnQoYyhkaWZmLCBzZV9kaWZmKSwgZGlnaXRzPTIpCmZpdF8xYSA8LSBzdGFuX2dsbSh5IH4geiwgZGF0YT1mYWtlLCByZWZyZXNoPTApCnByaW50KGZpdF8xYSkKCmZha2UkeCA8LSBybm9ybShuLCA1MCwgMjApCmZpdF8xYiA8LSBzdGFuX2dsbSh5IH4geiArIHgsIGRhdGE9ZmFrZSwgcmVmcmVzaD0wKQpwcmludChmaXRfMWIpCmBgYAoKIyMjIyBTaW11bGF0aW9uIHdpdGggcmVhbGlzdGljIHByZS10ZXN0CgpgYGB7ciB9Cm4gPC0gMTAwCnRydWVfYWJpbGl0eSA8LSBybm9ybShuLCA1MCwgMTYpCnggPC0gdHJ1ZV9hYmlsaXR5ICsgcm5vcm0obiwgMCwgMTIpCnlfaWZfY29udHJvbCA8LSB0cnVlX2FiaWxpdHkgKyBybm9ybShuLCAwLCAxMikgKyAxMAp5X2lmX3RyZWF0ZWQgPC0geV9pZl9jb250cm9sICsgNQp6IDwtIHNhbXBsZShyZXAoYygwLDEpLCBuLzIpKQp5IDwtIGlmZWxzZSh6PT0xLCB5X2lmX3RyZWF0ZWQsIHlfaWZfY29udHJvbCkKZmFrZV8yIDwtIGRhdGEuZnJhbWUoeCwgeSwgeikKZml0XzJhIDwtIHN0YW5fZ2xtKHkgfiB6LCBkYXRhPWZha2VfMiwgcmVmcmVzaD0wKQpmaXRfMmIgPC0gc3Rhbl9nbG0oeSB+IHogKyB4LCBkYXRhPWZha2VfMiwgcmVmcmVzaD0wKQpwcmludChmaXRfMmEpCnByaW50KGZpdF8yYikKYGBgCgojIyMjIFNpbXVsYXRpbmcgc2VsZWN0aW9uIGJpYXMgaW4gdHJlYXRtZW50IGFzc2lnbm1lbnQKCmBgYHtyIH0KaW52bG9naXQgPC0gcGxvZ2lzCnogPC0gcmJpbm9tKG4sIDEsIGludmxvZ2l0KC0oeC01MCkvMjApKQoKcGRmKCJkZXNpZ25fYmlhcy5wZGYiLCBoZWlnaHQ9NCwgd2lkdGg9NikKcGFyKG1hcj1jKDMsIDMsIDIsIDEpLCBtZ3A9YygxLjcsIC41LCAwKSwgdGNrPS0uMDEpCnBsb3QoeCwgaWZlbHNlKHo9PTEsIC45OSwgLjAxKSwgeWxpbT1jKDAsMSksIHhsYWI9IlByZS10ZXN0IHNjb3JlIiwgeWxhYj0iUHIgKHo9MSkiLCB4YXh0PSJuIiwgeWF4dD0ibiIsIHlheHM9ImkiLCBidHk9ImwiLCBwY2g9MjApCmF4aXMoMSwgc2VxKDAsMTAwLDIwKSkKYXhpcygyLCBzZXEoMCwxLC41KSkKdGV4dCg0NSwgMC45MywgIihhc3NpZ25lZCB0byB0cmVhdG1lbnQgZ3JvdXAsIHo9MSkiLCBjb2w9ImdyYXk0MCIpCnRleHQoNTUsIDAuMDcsICIoYXNzaWduZWQgdG8gY29udHJvbCBncm91cCwgej0wKSIsIGNvbD0iZ3JheTQwIikKY3VydmUoaW52bG9naXQoLSh4LTUwKS8yMCksIGFkZD1UUlVFKQpkZXYub2ZmKCkKCnkgPC0gaWZlbHNlKHo9PTEsIHlfaWZfdHJlYXRlZCwgeV9pZl9jb250cm9sKQpmYWtlXzMgPC0gZGF0YS5mcmFtZSh4LCB5LCB6KQoKRXhwZXJpbWVudCA8LSBmdW5jdGlvbihuKSB7CiAgdHJ1ZV9hYmlsaXR5IDwtIHJub3JtKG4sIDUwLCAxNikKICB4IDwtIHRydWVfYWJpbGl0eSArIHJub3JtKG4sIDAsIDEyKQogIHlfaWZfY29udHJvbCA8LSB0cnVlX2FiaWxpdHkgKyBybm9ybShuLCAwLCAxMikgKyAxMAogIHlfaWZfdHJlYXRlZCA8LSB5X2lmX2NvbnRyb2wgKyA1CiAgeiA8LSByYmlub20obiwgMSwgaW52bG9naXQoLSh4LTUwKS8yMCkpCiAgeSA8LSBpZmVsc2Uoej09MSwgeV9pZl90cmVhdGVkLCB5X2lmX2NvbnRyb2wpCiAgZmFrZV8zIDwtIGRhdGEuZnJhbWUoeCwgeSwgeikKICBmaXRfM2EgPC0gc3Rhbl9nbG0oeSB+IHosIGRhdGE9ZmFrZV8zLCByZWZyZXNoPTApCiAgZml0XzNiIDwtIHN0YW5fZ2xtKHkgfiB6ICsgeCwgZGF0YT1mYWtlXzMsIHJlZnJlc2g9MCkKICBpbmZlcmVuY2VzIDwtIHJiaW5kKGMoY29lZihmaXRfM2EpWyJ6Il0sIHNlKGZpdF8zYSlbInoiXSksIGMoY29lZihmaXRfM2IpWyJ6Il0sIHNlKGZpdF8zYilbInoiXSkpCiAgcmV0dXJuKGluZmVyZW5jZXMpCn0KCm4gPC0gMTAwCm5fbG9vcCA8LSA1MApyZXN1bHRzIDwtIGFycmF5KE5BLCBjKG5fbG9vcCwgMiwgMiksIGRpbW5hbWVzPWxpc3QoMTpuX2xvb3AsIGMoIlNpbXBsZSIsICJBZGp1c3RlZCIpLCBjKCJFc3RpbWF0ZSIsICJTRSIpKSkKZm9yIChsb29wIGluIDE6bl9sb29wKXsKICByZXN1bHRzW2xvb3AsLF0gPC0gRXhwZXJpbWVudChuKQp9CnJlc3VsdHNfYXZnIDwtIGFwcGx5KHJlc3VsdHMsIGMoMiwzKSwgbWVhbikKYGBgCgo=