Predictive uncertainty for congressional elections. See Chapters 10 and 15 in Regression and Other Stories.
library("rprojroot")
root<-has_file(".ROS-Examples-root")$make_fix_file()
library("rstanarm")
congress <- read.csv(root("Congress/data","congress.csv"))
inconsistent <- (congress$inc88==(-1) & congress$v86 > 0.5) | (congress$inc88==1 & congress$v86 < 0.5)
head(congress)
inc86 inc88 inc90 v86 v88 v90 v86_adj v88_adj v90_adj
1 1 1 1 0.7450362 0.7724427 0.7140294 0.7450362 0.7724427 0.7140294
2 1 1 1 0.6738455 0.6361816 0.5970501 0.6738455 0.6361816 0.5970501
3 1 1 0 0.6964566 0.6649283 0.5210433 0.6964566 0.6649283 0.5210433
4 -1 -1 -1 0.4645901 0.2738342 0.2343770 0.4645901 0.2738342 0.2343770
5 -1 -1 0 0.3910945 0.2636131 0.4774393 0.3910945 0.2636131 0.4774393
6 -1 -1 -1 0.3582454 0.3341927 0.2562970 0.3582454 0.3341927 0.2562970
data88 <- data.frame(vote=congress$v88_adj, past_vote=congress$v86_adj, inc=congress$inc88)
fit88 <- stan_glm(vote ~ past_vote + inc, data=data88, refresh=0)
print(fit88, digits=2)
stan_glm
family: gaussian [identity]
formula: vote ~ past_vote + inc
observations: 435
predictors: 3
------
Median MAD_SD
(Intercept) 0.24 0.02
past_vote 0.52 0.03
inc 0.10 0.01
Auxiliary parameter(s):
Median MAD_SD
sigma 0.07 0.00
------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg
data90 <- data.frame(past_vote=congress$v88_adj, inc=congress$inc90)
pred90 <- posterior_predict(fit88, newdata=data90)
dems_pred <- rowSums(pred90 > 0.5)
Alternately calculate that sum in a loop
n_sims <- 4000
dems_pred <- rep(NA, n_sims)
for (s in 1:n_sims) {
dems_pred[s] <- sum(pred90[s,] > 0.5)
}
print(c(mean(dems_pred), sd(dems_pred)), digits=2)
[1] 260.0 2.5
hist(dems_pred)
par(mar=c(3,0,1,0), mgp=c(1.5, .5, 0), tck=-.01)
v88_hist <- ifelse(congress$v88<.1, .0001, ifelse(congress$v88>.9, .9999, congress$v88))
hist(v88_hist, breaks=seq(0,1,.05),
xlab="Democratic share of the two-party vote", ylab="", yaxt="n", main="")
mtext("Congressional elections in 1988", 3, 0)
if (savefigs) dev.off()
if (savefigs) pdf("cong.pdf", height=4, width=4, colormodel="gray")
par(pty="s")
par(mar=c(3,0,3,0), mgp=c(1.7, .5, 0), tck=-.01)
plot(0, 0, xlim=c(0,1), ylim=c(0,1), type="n", xaxs="i", yaxs="i",
xlab="Democratic vote share in 1986", ylab="Democratic vote share in 1988")
abline(0,1, lwd=.5)
jitt <- function(vote){
n <- length(vote)
ifelse(vote<0.1, runif(n, 0.01, 0.04), ifelse(vote>0.9, runif(n, 0.96, 0.99), vote))
}
j_v86 <- jitt(congress$v86)
j_v88 <- jitt(congress$v88)
points(j_v86[congress$inc88==0], j_v88[congress$inc88==0], pch=1, cex=1)
points(j_v86[congress$inc88==1], j_v88[congress$inc88==1], pch=16, cex=.8)
points(j_v86[congress$inc88==-1], j_v88[congress$inc88==-1], pch=4, cex=.8)
mtext("Raw data", 3, .7)
par(pty="s")
par(mar=c(3,0,3,0), mgp=c(1.7, .5, 0), tck=-.01)
plot(0, 0, xlim=c(0,1), ylim=c(0,1), type="n", xaxs="i", yaxs="i",
xlab="Adjusted Dem. vote share in 1986", ylab="Adjusted Dem. vote share in 1988")
abline(0,1, lwd=.5)
points(congress$v86_adj[congress$inc88==0], congress$v88_adj[congress$inc88==0], pch=1, cex=1)
points(congress$v86_adj[congress$inc88==1], congress$v88_adj[congress$inc88==1], pch=16, cex=.8)
points(congress$v86_adj[congress$inc88==-1], congress$v88_adj[congress$inc88==-1], pch=4, cex=.8)
mtext("Adjusted data", 3, .7)