Miscellaneous analyses using raw Pew data. See Chapter 2 in Regression and Other Stories.
library("rprojroot")
root<-has_file(".ROS-Examples-root")$make_fix_file()
library("foreign")
pew_pre <- read.dta(root("Pew/data","pew_research_center_june_elect_wknd_data.dta"))
n <- nrow(pew_pre)
table(pew_pre[,"date"])
61808 61908 62008 62108 62208 62308 62408 62508 62608 62708 62808
228 422 340 412 376 442 420 280 370 258 334
62908 72308 72408 72508 72608 72708 73108 80108 80208 80308 80408
126 484 942 502 654 424 360 584 508 478 524
80508 80608 80708 80808 80908 81008 81108 90908 91008 91108 91208
742 410 532 560 540 540 32 614 1420 1184 976
91308 91408 92708 92808 92908 100908 101008 101108 101208 101308 101608
970 800 1498 1246 266 376 384 433 285 7 874
101708 101808 101908 102308 102408 102508 102608 102908 103008 103108 110108
922 843 377 535 527 400 38 873 867 864 798
which_question <- ifelse(!is.na(pew_pre$heat2), 2, ifelse (!is.na(pew_pre$heat4), 4, 0))
table(pew_pre$date, which_question)
which_question
0 2 4
61808 26 202 0
61908 96 326 0
62008 66 274 0
62108 94 318 0
62208 82 294 0
62308 96 346 0
62408 82 338 0
62508 68 212 0
62608 90 280 0
62708 64 194 0
62808 60 274 0
62908 36 90 0
72308 84 400 0
72408 140 802 0
72508 86 416 0
72608 112 542 0
72708 102 322 0
73108 42 318 0
80108 86 498 0
80208 92 416 0
80308 70 408 0
80408 82 442 0
80508 134 608 0
80608 92 318 0
80708 90 442 0
80808 120 440 0
80908 90 450 0
81008 74 466 0
81108 10 22 0
90908 80 534 0
91008 182 1238 0
91108 202 982 0
91208 172 804 0
91308 148 822 0
91408 162 638 0
92708 250 1248 0
92808 210 1036 0
92908 34 232 0
100908 46 330 0
101008 55 329 0
101108 47 386 0
101208 58 227 0
101308 1 6 0
101608 114 760 0
101708 124 798 0
101808 114 729 0
101908 65 312 0
102308 66 0 469
102408 55 0 472
102508 47 0 353
102608 7 0 31
102908 0 0 873
103008 0 0 867
103108 0 0 864
110108 0 0 798
numeric_heat2 <- as.numeric(pew_pre$heat2)
numeric_heat4 <- as.numeric(pew_pre$heat4)
rvote <- rep (NA, n)
for (i in 1:n){
if (which_question[i]==2){
rvote[i] <- ifelse(numeric_heat2[i]==1, 1,
ifelse(numeric_heat2[i]==2, 0, NA))
}
else if (which_question[i]==4){
rvote[i] <- ifelse(numeric_heat4[i]==1, 1,
ifelse(numeric_heat4[i]==2, 0, NA))
}
}
registered <- ifelse(pew_pre$regicert=="absolutely certain", 1, 0)
registered[is.na(registered)] <- 0
Date
early <- pew_pre$date < 90008
late <- pew_pre$date > 90008
month <- floor(pew_pre$date/10000)
day <- floor(pew_pre$date/100) - 100*month
day.numeric <- month*31 + day
poll.id <- ifelse (month==6, 1,
ifelse (month==7 & day<28, 2,
ifelse ((month==7 & day>=28) | month==8, 3,
ifelse (month==9 & day<16, 4,
ifelse (month==9 & day>=16, 5,
ifelse (month==10 & day<14, 6,
ifelse (month==10 & day>=14 & day<20, 7,
ifelse (month==10 & day>=20 & day<27, 8, 9))))))))
n.poll.id <- max(poll.id)
State (1-51, in alfa order, including DC)
stnum <- as.numeric(pew_pre$state)
Identify out DC
state.abb.long <- c(state.abb[1:8], "DC", state.abb[9:50])
dc <- state.abb.long[stnum]=="DC"
Votes
votes08 <- c(60,39, 62,36, 54,45, 58,39, 40,59, 46,53, 39,60, 37,62, 7,93, 49,51, 53,46, 25,74, 61,36, 38,61, 49,50, 45,54, 57,42, 58,41, 59,40, 40,58, 39,60, 36,62, 42,56, 44,54, 57,43, 49,49, 50,47, 57,41, 41,57, 44,55, 42,57, 42,57, 37,62, 49,50, 53,45, 47,51, 66,34, 42,56, 44,55, 35,63, 54,45, 54,44, 57,42, 55,44, 62,35, 32,67, 48,52, 43,56, 56,43, 43,56, 65,33)
obama08 <- votes08[seq(2,102,2)]
mccain08 <- votes08[seq(1,101,2)]
ovote.actual <- obama08/(obama08+mccain08)
stnum <- as.numeric(pew_pre$state)
Weight
pop.weight0 <- pew_pre$weight
voter.weight0 <- ifelse(is.na(rvote) | registered==0, NA, pop.weight0)
pop.weight <- rep(NA, length(pop.weight0))
voter.weight <- rep(NA, length(voter.weight0))
for (i in 1:n.poll.id){
ok <- poll.id==i
pop.weight[ok] <- pop.weight0[ok]/mean(pop.weight0[ok])
voter.weight[ok] <- voter.weight0[ok]/mean(voter.weight0[ok],na.rm=TRUE)
}
voter.weight1 <- rep(1, length(voter.weight0))
for (i in 1:51){
ok <- stnum==i
if (sum(ok)>10){
sum.d <- sum(voter.weight[ok & rvote==0], na.rm=TRUE)
sum.r <- sum(voter.weight[ok & rvote==1], na.rm=TRUE)
ovote.sample <- sum.d/(sum.d+sum.r)
voter.weight1[ok & rvote==0] <- ovote.actual[i]/ovote.sample
voter.weight1[ok & rvote==1] <- (1-ovote.actual[i])/(1-ovote.sample)
}
}
voter.weight2 <- voter.weight*voter.weight1
Income (1-9 scale)
inc <- as.numeric(pew_pre$income)
inc[inc==10] <- NA
value.inc <- c(5,15,25,35,45,62.5,87.5,125,200)
n.inc <- max(inc, na.rm=TRUE)
Party id
pid0 <- as.numeric(pew_pre[,"party"])
lean0 <- as.numeric(pew_pre[,"partyln"])
pid <- ifelse(pid0==2, 5, # Republican
ifelse(pid0==3, 1, # Democrat
ifelse(lean0==2, 4, # Lean Republican
ifelse(lean0==4, 2, # Lean Democrat
3)))) # Independent
#1=Dem, 2=Lean Dem, 2=Ind, 4=Lean Rep, 5=Repub
pid.label <- c("Democrat", "Lean Dem.", "Independent", "Lean Rep.", "Republican")
n.pid <- max(pid, na.rm=TRUE)
Ideology
ideology0 <- as.numeric(pew_pre[,"ideo"])
ideology <- ifelse(ideology0==2, 5, # Very conservative
ifelse(ideology0==3, 4, # Conservative
ifelse(ideology0==6, 1, # Very liberal
ifelse(ideology0==5, 2, # Liberal
3)))) # Moderate
ideology.label <- c("Very liberal", "Liberal", "Moderate", "Conservative", "Very conservative")
n.ideology <- max(ideology, na.rm=TRUE)
par(mar=c(3,2,2.5,1), mgp=c(1.5,.7,0), tck=-.01)
plot(c(1,n.inc), c(0,1), xaxs="i", yaxs="i", type="n", xlab="", ylab="", xaxt="n", yaxt="n")
axis(1, 1:n.inc, rep("",n.inc))
axis(1, seq(1.5,n.inc-.5,length=3), c("Low income", "Middle income", "High income"), tck=0)
axis(2, c(0,.5,1), c(0,"50%","100%"))
center <- floor((1+n.inc)/2)
incprop <- array(NA, c(n.pid+1,n.inc))
incprop[n.pid+1,] <- 1
for (i in 1:n.pid){
for (j in 1:n.inc){
incprop[i,j] <- weighted.mean((pid<i)[inc==j], pop.weight0[inc==j], na.rm=TRUE)
}
}
for (i in 1:n.pid){
polygon(c(1:n.inc, n.inc:1), c(incprop[i,], rev(incprop[i+1,])), col=paste("gray", 40+10*i, sep=""))
lines(1:n.inc, incprop[i,])
text(center, mean(incprop[c(i,i+1),center]), pid.label[i])
}
mtext("Self-declared party identification, by income", side=3, line=1, cex=1.2)
par(mar=c(3,2,2.5,1), mgp=c(1.5,.7,0), tck=-.01)
plot(c(1,n.inc), c(0,1), xaxs="i", yaxs="i", type="n", xlab="", ylab="", xaxt="n", yaxt="n")
axis(1, 1:n.inc, rep("",n.inc))
axis(1, seq(1.5,n.inc-.5,length=3), c("Low income", "Middle income", "High income"), tck=0)
axis(2, c(0,.5,1), c(0,"50%","100%"))
center <- floor((1+n.inc)/2)
incprop <- array(NA, c(n.ideology+1,n.inc))
incprop[n.ideology+1,] <- 1
for (i in 1:n.ideology){
for (j in 1:n.inc){
incprop[i,j] <- weighted.mean((ideology<i)[inc==j], pop.weight0[inc==j], na.rm=TRUE)
}
}
for (i in 1:n.ideology){
polygon(c(1:n.inc, n.inc:1), c(incprop[i,], rev(incprop[i+1,])), col=paste("gray", 40+10*i, sep=""))
lines(1:n.inc, incprop[i,])
text(center, mean(incprop[c(i,i+1),center]), ideology.label[i])
}
mtext("Self-declared political ideology, by income", side=3, line=1, cex=1.2)
Recoded religious attendance (relatt) to 1-5 scale
relatt <- 7 - as.numeric(pew_pre$attend)
relatt[relatt==0] <- NA
relatt <- ifelse(relatt==1|relatt==2, 1,
ifelse(relatt==3, 2,
ifelse(relatt==4, 3,
ifelse(relatt==5, 4,
ifelse(relatt==6, 5, NA)))))
n.relatt <- max(relatt, na.rm=TRUE)
relatt.label <- c("Nonattenders","Rare attenders","Occasional\nattenders","Frequent attenders","Very frequent\nchurch attenders")
Categories
pid2 <- ifelse(pid==1, 1, ifelse(pid==5, 3, 2))
ideology2 <- ifelse(ideology==1|ideology==2, 1, ifelse(ideology==3, 2, 3))
pid2.label <- c("Democrats", "Independents", "Republicans")
ideology2.label <- c("Liberal", "Moderate", "Conservative")
prop <- array(NA, c(3,3,n.inc))
for (i in 1:3){
for (j in 1:3){
for (k in 1:n.inc){
prop[i,j,k] <- weighted.mean(pid2==i & ideology2==j & inc==k, pop.weight, na.rm=TRUE)
}
}
}
par(mfrow=c(3,3), oma=c(0,0,2.5,0))
par(mar=c(2,2,4,0), mgp=c(1.5,.7,0), tck=-.01)
for (i in 1:3){
for (j in 1:3){
plot(1:n.inc, prop[i,j,], type="l", bty="l", xaxs="i", yaxs="i", ylim=c(0,max(prop)),
xaxt="n", yaxt="n", xlab="", ylab="")
axis(1, 1:n.inc, rep("",n.inc))
axis(1, seq(2.7,n.inc-1.7,length=2), c("Low income", "High income"), tck=0, cex.axis=1.2)
axis(2, c(0,.01,.02), c("0","1%","2%"), cex.axis=1.2)
mtext(paste(ideology2.label[j], pid2.label[i]), side=3, line=1, cex=.9)
}
}
mtext("Income distributions within self-reported political categories in 2008",
outer=TRUE, side=3, line=.5, cex=1.2)