Code and figures for ChileSchools example. See Chapter 21 in Regression and Other Stories.
Data are from
library("rprojroot")
root<-has_file(".ROS-Examples-root")$make_fix_file()
library("foreign")
library("arm")
library("rstanarm")
library("brms")
library("ivpack")
The outcomes in these analyses are the gain scores between 88 and 92.
chile <- read.csv(root("ChileSchools/data","chile.csv"))
print(head(chile), digits=3)
p90 cmb_regn urban88 rule2 cutoff cutoff_cmb eligible read92 read88
1 0 Regions 2,5,10 1 3.415 49.4 49.4 0 57.0 54.4
2 0 Regions 6,8 1 31.815 43.4 43.4 0 85.5 79.1
3 0 Regions 6,8 1 4.445 43.4 43.4 0 52.0 47.8
4 0 Region 13 1 16.800 46.4 46.4 0 66.4 65.1
5 0 Regions 2,5,10 1 0.095 49.4 49.4 0 52.5 49.3
6 1 Region 13 1 -2.030 46.4 46.4 1 55.3 50.5
math92 math88
1 67.1 51.3
2 84.7 71.4
3 53.3 47.9
4 56.4 61.3
5 50.9 49.7
6 59.0 38.2
fit_1a <- stan_glm(read92 ~ eligible + rule2, data=chile, refresh=0)
fit_1b <- stan_glm(read92 ~ eligible + rule2, data=chile, subset = abs(rule2)<5, refresh=0)
fit_2b <- stan_glm(read92 ~ eligible + rule2 + eligible:rule2, data=chile, subset = abs(rule2)<5, refresh=0)
fit_3a <- stan_glm(read92 ~ eligible + rule2 + read88 + math88, data=chile, refresh=0)
fit_3b <- stan_glm(read92 ~ eligible + rule2 + read88 + math88, data=chile, subset = abs(rule2)<5, refresh=0)
chile$z_read88 <- (chile$read88 - mean(chile$read88))/sd(chile$read88)
chile$z_math88 <- (chile$math88 - mean(chile$math88))/sd(chile$math88)
fit_5a <- stan_glm(read92 ~ eligible + rule2 + z_read88 + z_math88 + eligible:z_read88, data=chile, refresh=0)
fit_5b <- stan_glm(read92 ~ eligible + rule2 + z_read88 + z_math88 + eligible:z_read88, data=chile, subset = abs(rule2)<5, refresh=0)
whiteline <- function(a, b, from=-1000, to=1000){
curve(a + b*x, from=from, to=to, col="white", lwd=4, add=TRUE)
curve(a + b*x, from=from, to=to, lwd=2, add=TRUE)
}
par(mar=c(3,3,2,1), mgp=c(1.7, .5, 0), tck=-.01)
yy <- chile$read92
xx <- chile$rule2
plot(y=yy, x=xx, type="n",
xlab="Assignment variable",
ylab="Post-test", bty="l", yaxt="n")
axis(2, seq(0,100,20))
points(xx[chile$eligible==0],yy[chile$eligible==0],pch=20,
cex=.7,col="gray60")
points(xx[chile$eligible==1],yy[chile$eligible==1],pch=20,
cex=.7)
whiteline(coef(fit_1a)[1], coef(fit_1a)[3], from=0)
whiteline(coef(fit_1a)[1] + coef(fit_1a)[2], coef(fit_1a)[3], to=0)
abline(v=0, lwd=4, col="gray")
mtext("All the data", 3, 1)
par(mar=c(3,3,2,1), mgp=c(1.7, .5, 0), tck=-.01)
yy <- chile$read92
xx <- chile$rule2
plot(y=yy, x=xx, type="n",
xlab="Assignment variable",
ylab="Post-test", bty="l", yaxt="n", xlim=c(-5,5), xaxs="i")
axis(2, seq(0,100,20))
points(xx[chile$eligible==0],yy[chile$eligible==0],pch=20,
cex=.7,col="gray60")
points(xx[chile$eligible==1],yy[chile$eligible==1],pch=20,
cex=.7)
whiteline(coef(fit_1b)[1], coef(fit_1b)[3], from=0)
whiteline(coef(fit_1b)[1] + coef(fit_1b)[2], coef(fit_1b)[3], to=0)
abline(v=0, lwd=4, col="gray")
mtext("Restricting to data near the cutoff", 3, 1)
par(mar=c(3,3,2,1), mgp=c(1.7, .5, 0), tck=-.01)
yy <- chile$read92
xx <- chile$rule2
plot(y=yy, x=xx, type="n",
xlab="Assignment variable",
ylab="Ppst-test", bty="l", yaxt="n", xlim=c(-5,5), xaxs="i")
axis(2, seq(0,100,20))
points(xx[chile$eligible==0],yy[chile$eligible==0],pch=20,
cex=.7,col="gray60")
points(xx[chile$eligible==1],yy[chile$eligible==1],pch=20,
cex=.7)
whiteline(coef(fit_2b)[1], coef(fit_2b)[3], from=0)
whiteline(coef(fit_2b)[1] + coef(fit_2b)[2], coef(fit_2b)[3] + coef(fit_2b)[4], to=0)
abline(v=0, lwd=4, col="gray")
mtext("Model with interaction", 3, 1)
par(mar=c(3,3,2,1), mgp=c(1.7, .5, 0), tck=-.01)
yy <- chile$read92 - coef(fit_3b)[4] * (chile$read88 - mean(chile$read88)) - coef(fit_3b)[5] * (chile$math88 - mean(chile$math88))
xx <- chile$rule2
plot(y=yy, x=xx, type="n",
xlab="Assignment variable",
ylab="Adjusted outcome", bty="l", yaxt="n", xlim=c(-5,5), xaxs="i")
axis(2, seq(-100,100,20))
points(xx[chile$eligible==0],yy[chile$eligible==0],pch=20,
cex=.7,col="gray60")
points(xx[chile$eligible==1],yy[chile$eligible==1],pch=20,
cex=.7)
whiteline(coef(fit_3b)[1] + coef(fit_3b)[4] *mean(chile$read88) + coef(fit_3b)[5] *mean(chile$math88), coef(fit_3b)[3], from=0)
whiteline(coef(fit_3b)[1] + coef(fit_3b)[4] *mean(chile$read88) + coef(fit_3b)[5] *mean(chile$math88) + coef(fit_3b)[2], coef(fit_3b)[3], to=0)
abline(v=0, lwd=4, col="gray")
mtext("Adjusting for pre-test scores", 3, 1)
par(mar=c(3,3,2,1), mgp=c(1.7, .5, 0), tck=-.01)
yy <- chile$read92 - coef(fit_3b)[4] * (chile$read88 - mean(chile$read88)) - coef(fit_3b)[5] * (chile$math88 - mean(chile$math88))
xx <- chile$rule2
n_bins <- 20
n <- length(xx)
halfwidth <- 5
cutpoints <- c(seq(-halfwidth, halfwidth, length=n_bins+1))
xx_bin <- rep(NA, n_bins)
yy_bin <- rep(NA, n_bins)
for (i in 1:n_bins){
keep <- xx > cutpoints[i] & xx <= cutpoints[i+1]
xx_bin[i] <- mean(xx[keep])
yy_bin[i] <- mean(yy[keep])
}
plot(y=yy_bin, x=xx_bin, type="n",
xlab="Assignment variable",
ylab="Adjusted outcome", bty="l", yaxt="n", xlim=c(-halfwidth, halfwidth), xaxs="i")
axis(2, seq(-100,100,1))
points(xx_bin[xx_bin>0], yy_bin[xx_bin>0],pch=20,
cex=2, col="gray60")
points(xx_bin[xx_bin<0], yy_bin[xx_bin<0],pch=20,
cex=2)
whiteline(coef(fit_3b)[1] + coef(fit_3b)[4] *mean(chile$read88) + coef(fit_3b)[5] *mean(chile$math88), coef(fit_3b)[3], from=0)
whiteline(coef(fit_3b)[1] + coef(fit_3b)[4] *mean(chile$read88) + coef(fit_3b)[5] *mean(chile$math88) + coef(fit_3b)[2], coef(fit_3b)[3], to=0)
abline(v=0, lwd=4, col="gray")
mtext("Binned averages with same regression lines", 3, 1)
mean(chile$p90[chile$eligible==0 & abs(chile$rule2) < 5]) # .053
[1] 0.05333333
mean(chile$p90[chile$eligible==1 & abs(chile$rule2) < 5]) # .606
[1] 0.6061453
# with brms as in IV section earlier in the chapter
set.seed(1234)
chile$diff_read92 <- chile$read92 - chile$read88
rd_f1 <- bf(p90 ~ eligible)
rd_f2 <- bf(diff_read92 ~ p90)
IV_brm_rd <- brm(formula=rd_f1 + rd_f2, data = chile[abs(chile$rule2) < 5,])
print(IV_brm_rd, digits=3)
Family: MV(gaussian, gaussian)
Links: mu = identity; sigma = identity
mu = identity; sigma = identity
Formula: p90 ~ eligible
diff_read92 ~ p90
Data: chile[abs(chile$rule2) < 5, ] (Number of observations: 883)
Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
total post-warmup samples = 4000
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
p90_Intercept 0.054 0.016 0.023 0.084 1.000 4812
diffread92_Intercept 10.944 0.341 10.271 11.594 1.001 2611
p90_eligible 0.552 0.025 0.502 0.600 1.000 4484
diffread92_p90 5.065 0.876 3.347 6.749 1.002 2329
Tail_ESS
p90_Intercept 2873
diffread92_Intercept 2584
p90_eligible 2840
diffread92_p90 2602
Family Specific Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma_p90 0.357 0.009 0.341 0.375 1.001 4718 2808
sigma_diffread92 7.123 0.176 6.786 7.484 1.001 3478 3244
Residual Correlations:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
rescor(p90,diffread92) -0.182 0.053 -0.282 -0.073 1.001 2524
Tail_ESS
rescor(p90,diffread92) 2733
Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
# t.e. is 5.08 with s.e. of .90
summary(ivreg(formula = diff_read92 ~ p90 | eligible, data=chile, subset = abs(rule2) < 5))
Call:
ivreg(formula = diff_read92 ~ p90 | eligible, data = chile, subset = abs(rule2) <
5)
Residuals:
Min 1Q Median 3Q Max
-33.8471 -4.3416 0.1009 4.6948 25.7337
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 10.9411 0.3421 31.981 < 2e-16 ***
p90 5.0871 0.8814 5.771 1.09e-08 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 7.109 on 881 degrees of freedom
Multiple R-Squared: 0.009344, Adjusted R-squared: 0.00822
Wald test: 33.31 on 1 and 881 DF, p-value: 1.088e-08
# est is 5.09 with s.e. of .88