Code and figures for Infant Health and Development Program (IHDP) example. See Chapter 20 in Regression and Other Stories.
The intervention for low-birth-weight children is described by
library("rprojroot")
root<-has_file(".ROS-Examples-root")$make_fix_file()
library("rstan")
library("arm")
library("rstanarm")
library("survey")
source(root("Childcare/library","matching.R"))
source(root("Childcare/library","balance.R"))
source(root("Childcare/library","estimation.R"))
cc2 <- read.csv(root("Childcare/data","cc2.csv"))
head(cc2)
momage momrace b.marr momed work.dur prenatal cig booze sex first bw
1 33 3 1 4 1 1 0 0 1 0 1559
2 22 1 0 1 0 1 0 1 1 0 2240
3 13 1 0 1 0 1 0 0 1 1 1900
4 25 1 1 4 1 1 0 0 1 1 1550
5 19 1 0 1 0 1 1 0 1 0 2270
6 19 1 0 2 1 1 1 1 0 1 1550
preterm age dayskidh ppvtr.36 sample b.state st5 st9 st12 st25 st36 st42
1 10 60.79671 31 111 I 5 1 0 0 0 0 0
2 3 59.77823 4 81 I 5 1 0 0 0 0 0
3 6 59.51540 9 92 I 5 1 0 0 0 0 0
4 8 59.18686 50 103 I 5 1 0 0 0 0 0
5 5 58.79261 4 81 I 5 1 0 0 0 0 0
6 4 58.49692 13 94 I 5 1 0 0 0 0 0
st48 st53 income ineeds unemp.rt st99 treat treat0 b.8state black
1 0 0 42500 2.0083910 2 FALSE 1 1 5 0
2 0 0 5000 0.3665904 4 FALSE 1 1 5 1
3 0 0 12500 0.2660848 5 FALSE 1 1 5 1
4 0 0 42500 4.8643700 6 FALSE 1 1 5 1
5 0 0 5000 0.4463090 7 FALSE 1 1 5 1
6 0 0 12500 0.4463090 5 FALSE 1 1 5 1
hispanic white bs.by.aa lths hs ltcoll college v.yng young y.adult late20
1 0 1 105 0 0 0 1 0 0 0 0
2 0 0 205 1 0 0 0 0 0 1 0
3 0 0 205 1 0 0 0 1 0 0 0
4 0 0 205 0 0 0 1 0 0 0 1
5 0 0 205 1 0 0 0 0 1 0 0
6 0 0 205 0 1 0 0 0 1 0 0
older ptcat ptcat0 ptcat1 ptcat2 ptcat3 bwg ethnic educ educ3 state state2
1 1 3 0 0 0 1 0 3 4 3 1 0
2 0 2 0 0 1 0 1 2 1 1 1 0
3 0 3 0 0 0 1 0 2 1 1 1 0
4 0 3 0 0 0 1 0 2 4 3 1 0
5 0 3 0 0 0 1 1 2 1 1 1 0
6 0 2 0 0 1 0 0 2 2 2 1 0
state3 neg.bw no.prenatal b.unmarr bwT dayskidT pretermT momageT
1 1 941 0 0 3481 3.465736 324 1089
2 1 260 0 1 547600 1.609438 121 484
3 1 600 0 1 160000 2.302585 196 169
4 1 950 0 0 2500 3.931826 256 625
5 1 230 0 1 592900 1.609438 169 361
6 1 950 0 1 2500 2.639057 144 361
at age 3 with observations displayed in different colors.
par(mfrow=c(1,1))
tmp <- lm(ppvtr.36~bw+treat,data=cc2)$coef
plot(cc2$bw, cc2$ppvtr.36, xlab="birth weight", ylab="test score at age 3", mgp=c(2,.5,0), main="",type="n",xlim=c(1500,5000),cex.axis=.75,cex.lab=.8,lab=c(3,5,7),xaxt="n")
axis(side=1,at=c(2000,3000,4000,5000),cex.axis=.75)
points(cc2$bw[cc2$treat==0]+runif(sum(cc2$treat==0),-.5,5), cc2$ppvtr.36[cc2$treat==0], col="darkgrey", pch=20, cex=.3)
points(cc2$bw[cc2$treat==1]+runif(sum(cc2$treat==1),-.5,5), cc2$ppvtr.36[cc2$treat==1], pch=20, cex=.3)
curve(tmp[1]+tmp[2]*x,add=T,lty=2)
curve(tmp[1]+tmp[3]+tmp[2]*x,add=T)
par(mfrow=c(1,2))
hist(cc2$educ[cc2$treat==0],xlim=c(0,5),main="",border="darkgrey",breaks=c(.5,1.5,2.5,3.5,4.5),mgp=c(2,.5,0),xlab="mother's education",freq=TRUE)
hist(cc2$educ[cc2$treat==1],xlim=c(0,5),xlab="education",breaks=c(.5,1.5,2.5,3.5,4.5),freq=TRUE,add=T)
hist(cc2$age[cc2$treat==0],xlim=c(0,110),main="",xlab="age of child (months)",border="darkgrey",breaks=seq(0,110,10),mgp=c(2,.5,0),
freq=TRUE)
hist(cc2$age[cc2$treat==1],xlim=c(0,110),xlab="",breaks=seq(0,110,10),freq=TRUE, add=T)
edu <- list(cc2$lths, cc2$hs, cc2$ltcoll, cc2$college)
# mean difference
y.trt <- sapply(edu, FUN=function(x) {
tapply(cc2$ppvtr.36, list(cc2$treat, x), mean)[4]
})
y.ctrl <- sapply(edu, FUN=function(x) {
tapply(cc2$ppvtr.36, list(cc2$treat, x), mean)[3]
})
te <- y.trt - y.ctrl
# sample sizes
n.trt <- sapply(edu, FUN=function(x) {
tapply(rep(1, nrow(cc2)), list(cc2$treat, x), sum)[4]
})
n.ctrl <- sapply(edu, FUN=function(x) {
sum(tapply(rep(1, nrow(cc2)), list(cc2$treat, x), sum)[3])
})
# std errors
var.trt <- sapply(edu, FUN=function(x) {
tapply(cc2$ppvtr.36, list(cc2$treat, x), var)[4]
})
var.ctrl <- sapply(edu, FUN=function(x) {
tapply(cc2$ppvtr.36, list(cc2$treat, x), var)[3]
})
se <- sqrt(var.trt/n.trt + var.ctrl/n.ctrl)
tes <- matrix(c(te, n.trt, n.ctrl, se), nrow=4)
rownames(tes) <- c('lths', 'hs', 'ltcoll', 'college')
colnames(tes) <- c('te', 'n.trt', 'n.ctrl', 'se')
round(tes, 1)
te n.trt n.ctrl se
lths 9.3 126 1232 1.5
hs 4.1 82 1738 1.9
ltcoll 7.9 48 789 2.4
college 4.6 34 332 2.3
(9.3* 1358 + 4.1 * 1820 + 7.9* 837 + 4.6* 366) / (1358+1820+837+366)
[1] 6.479639
standard error
(1.5^2* 1358^2 + 1.9^2 * 1738^2 + 2.4^2* 789^2 + 2.3^2 * 366^2) / ((1358+1820+837+366)^2)
[1] 1.00808
round((9.3* 126 + 4.1 * 82 + 7.9* 48 + 4.6* 34) / (126+82+48+34), 1)
[1] 7
standard error
round((1.5^2* 126^2 + 1.9^2 * 82^2 + 2.4^2* 48^2 + 2.3^2 * 34^2) / ((126+82+48+34)^2),2)
[1] 0.94
these are the no redundancy covariates with and without state covariates
covs.nr <- c('bwg', 'hispanic', 'black', 'b.marr', 'lths', 'hs', 'ltcoll', 'work.dur', 'prenatal', 'sex', 'first', 'bw', 'preterm', 'momage', 'dayskidh')
covs.nr.st <- c(covs.nr, 'st5', 'st9', 'st12', 'st25', 'st36', 'st42', 'st53')
pscore estimation formula with original covariates
ps_spec <- formula(treat ~ bw + bwg + hispanic + black + b.marr + lths + hs + ltcoll + work.dur + prenatal + sex + first + preterm + momage + dayskidh + income)
pscore estimation formula with states added
ps_spec.st <- update(ps_spec, . ~ . + st5 + st9 + st12 + st25 + st36 + st42 + st48 + st53)
set.seed(1234)
ps_fit_1 <- stan_glm(ps_spec, family=binomial(link='logit'), data=cc2, algorithm='optimizing', refresh=0)
ps_fit_1.st <- stan_glm(ps_spec.st, family=binomial(link='logit'), data=cc2, algorithm='optimizing', refresh=0)
extracting (logit) pscores from the fit
pscores <- apply(posterior_linpred(ps_fit_1), 2, mean)
pscores.st <- apply(posterior_linpred(ps_fit_1.st), 2, mean)
matching without replacement, original formula
matches <- matching(z=cc2$treat, score=pscores, replace=FALSE)
matched <- cc2[matches$match.ind,]
matching with replacement, original formula
matches.wr <- matching(z=cc2$treat, score=pscores, replace=TRUE)
wts.wr <- matches.wr$cnts
matching without replacement, pscores that include state indicators
matches.st <- matching(z=cc2$treat, score=pscores.st, replace=FALSE)
matched.st <- cc2[matches.st$match.ind,]
matching with replacement, pscores that include state indicators
matches.st.wr <- matching(z=cc2$treat, score=pscores.st, replace=TRUE)
wts.st.wr <- matches.st.wr$cnts
Balance plots for all covariates specified in STEP 1 (in the book)
covs <- c('bw', 'preterm', 'dayskidh', 'sex', 'first', 'age', 'black', 'hispanic', 'white', 'b.marr', 'lths', 'hs', 'ltcoll', 'college', 'work.dur', 'prenatal', 'momage')
cov_names <- c('birth weight', 'weeks preterm', 'days in hospital', 'male', 'first born', 'age', 'black', 'hispanic', 'white', 'unmarried at birth', 'less than high school', 'high school graduate', 'some college', 'college graduate', 'worked during pregnancy', 'had no prenatal care', 'age at birth')
bal_nr <- balance(rawdata=cc2[,covs], treat=cc2$treat, matched=matches$cnts, estimand='ATT')
Balance diagnostics assume that the estimand is the ATT
bal_nr.wr <- balance(rawdata=cc2[,covs], treat=cc2$treat, matched=matches.wr$cnts, estimand='ATT')
Balance diagnostics assume that the estimand is the ATT
bal_nr.wr.st <- balance(rawdata=cc2[, union(covs, covs.nr.st)], treat=cc2$treat, matched=matches.st.wr$cnts, estimand='ATT')
Balance diagnostics assume that the estimand is the ATT
Labeled cov names, ps_fit_1 MwoR. No state variables included.
Balance for all covariates, not just those used in propensity score model
pts <- bal_nr$diff.means.raw[,4]
pts2 <- bal_nr$diff.means.matched[,4]
K <- length(pts)
idx <- 1:K
main <- 'Absolute Standardized Difference in Means'
mar <- c(8, 6, 6, 7)
par(mar=mar)
maxchar <- max(sapply(cov_names, nchar))
min.mar <- par('mar')
mar[2] <- max(min.mar[2], trunc(mar[2] + maxchar/10)) + mar[2] + 0.5
par(mar=mar)
pts <- rev(pts)
pts2 <- rev(pts2)
longcovnames <- rev(cov_names)
plot(c(pts,pts2), c(idx,idx),
bty='n', xlab='', ylab='',
xaxt='n', yaxt='n', type='n',
main=main, cex.main=1.2)
abline(v=0, lty=2)
points(pts, idx, cex=1)
points(pts2, idx, pch=19, cex=1)
axis(3)
axis(2, at=1:K, labels=longcovnames[1:K],
las=2, hadj=1, lty=0, cex.axis=1.2)
Separate balance plots for continuous and binary variables. Labelled cov names, ps_fit_1 MWR. Manual plot code based on the code in balance.R.
par(mfrow=c(1,2))
mar1 <- c(5, 4, 6, 2)
mar2 <- c(5, 3, 6, 4)
pts <- bal_nr.wr$diff.means.raw[bal_nr.wr$binary==TRUE,4]
pts2 <- bal_nr.wr$diff.means.matched[bal_nr.wr$binary==TRUE,4]
K <- length(pts)
idx <- 1:K
main <- 'Absolute Difference in Means'
longcovnames <- cov_names[bal_nr.wr$binary==TRUE]
mar <- mar1
par(mar=mar)
maxchar <- max(sapply(longcovnames, nchar))
min.mar <- par('mar')
mar[2] <- max(min.mar[2], trunc(mar[2] + maxchar/10)) + mar[2] + 0.5
par(mar=mar)
pts <- rev(pts)
pts2 <- rev(pts2)
longcovnames <- rev(longcovnames)
plot(c(pts,pts2), c(idx,idx),
bty='n', xlab='', ylab='',
xaxt='n', yaxt='n', type='n',
main=main, cex.main=1.2,
xlim=c(0,.55))
abline(v=0, lty=2)
points(pts, idx, cex=1)
points(pts2, idx, pch=19, cex=1)
axis(3, at=seq(0,.5,.1), xpd=TRUE)
axis(2, at=1:K, labels=longcovnames[1:K],
las=2, hadj=1, lty=0, cex.axis=1)
pts <- bal_nr.wr$diff.means.raw[bal_nr.wr$binary==FALSE,4]
pts2 <- bal_nr.wr$diff.means.matched[bal_nr.wr$binary==FALSE,4]
# AZC: hack to fix spacing of binary covariates against x axis
# the spacing of how spaced apart the ticks are changes as the number of covariates change. It's frustratingly hard, maybe impossible, to get the spacing to match between the continuous and binary plots with different number of covariates in each, so, I'll add fake data that won't show up
pts <- c(pts, rep(NA, 7))
pts2 <- c(pts2, rep(NA, 7))
K <- length(pts)
idx <- 1:K
main <- 'Absolute Standardized Difference in Means'
longcovnames <- cov_names[bal_nr.wr$binary==FALSE]
# add extra names to match above
longcovnames <- c(longcovnames, rep('', 7))
mar <- mar2
par(mar=mar)
maxchar <- max(sapply(longcovnames, nchar))
min.mar <- par('mar')
mar[2] <- max(min.mar[2], trunc(mar[2] + maxchar/10)) + mar[2] + 0.5
par(mar=mar)
pts <- rev(pts)
pts2 <- rev(pts2)
longcovnames <- rev(longcovnames)
plot(c(pts,pts2), c(idx,idx),
bty='n', xlab='', ylab='',
xaxt='n', yaxt='n', type='n',
main=main, cex.main=1.2)
segments(x0=0, y0=13, x1=0, y1=7.5, lty=2)
points(pts, idx, cex=1)
points(pts2, idx, pch=19, cex=1)
axis(3)
axis(2, at=8:12, labels=longcovnames[8:12],
las=2, hadj=1, lty=0, cex.axis=1)
Pscores from original model.
par(mfrow=c(1,2), cex.main=1.3, cex.lab=1.3)
# Plot the overlapping histograms for pscores before matching, density
par(mar=c(16,8,2,2))
hist(pscores[cc2$treat==0], xlim=c(-20,5), ylim=c(0,.28), main="before matching", border="darkgrey", mgp=c(2,.5,0), xlab="logit propensity scores", freq=FALSE)
hist(pscores[cc2$treat==1], freq=FALSE, add=TRUE)
# Plot the overlapping histograms for pscores after matching, frequency
par(mar=c(16,3,2,8))
hist(pscores[cc2[matches.wr$match.ind, 'treat']==0], xlim=c(-20,6), ylim=c(0,.28), main="after matching", border="darkgrey", mgp=c(2,.5,0), xlab="logit propensity scores", freq=FALSE)
hist(pscores[cc2[matches.wr$match.ind, 'treat']==1], freq=FALSE, add=TRUE)
how many pscores[cc2$treat==0] left out of plot?
sum(pscores[cc2$treat==0] < -20)
[1] 6
pscore matching check
sum(pscores[cc2$treat==1] > max(pscores[cc2$treat==0]))
[1] 13
set.seed(20)
ps3.mod <- glm(treat ~ unemp.rt, data=cc2,family=binomial)
pscores3 <- predict(ps3.mod, type="link")
par(mar=c(8,3,4,3), cex=1.4, cex.lab=1.2)
# Plot the overlapping histograms for pscore3, density
hist(pscores3[cc2$treat==0], xlim=range(pscores3), ylim=c(0,8),
main="", border="darkgrey",
mgp=c(2,.5,0), xlab="logit propensity scores",freq=FALSE)
hist(pscores3[cc2$treat==1], freq=FALSE, add=TRUE)
te_spec_nr <- update(ps_spec, ppvtr.36 ~ treat + .)
treatment effect without replacement
set.seed(20)
reg_ps <- stan_glm(te_spec_nr, data=cc2[matches$match.ind,], algorithm='optimizing')
treatment effect with replacement
set.seed(20)
reg_ps.wr <- stan_glm(te_spec_nr, data=cc2, weight=matches.wr$cnts, algorithm='optimizing')
ps_fit_1_design <- svydesign(ids=~1, weights=matches.wr$cnts, data=cc2)
reg_ps.wr_svy <- svyglm(te_spec_nr, design=ps_fit_1_design, data=cc2)
summary(reg_ps)['treat', 1:2]
Median MAD_SD
10.428777 1.518188
summary(reg_ps.wr)['treat', 1:2]
Median MAD_SD
11.378185 1.230367
summary(reg_ps.wr_svy)$coef['treat', 1:2]
Estimate Std. Error
9.699688 2.049842
Geographic information, covs_nr.st
te_spec_nr.st <- update(ps_spec.st, ppvtr.36 ~ treat + . + st5 + st9 + st12 + st25 + st36 + st42 + st48 + st53)
treatment effect without replacement
set.seed(20)
reg_ps.st <- stan_glm(te_spec_nr.st, data=cc2[matches.st$match.ind,], algorithm='optimizing')
treatment effect with replacement
set.seed(20)
reg_ps.st.wr <- stan_glm(te_spec_nr.st, data=cc2, weight=matches.st.wr$cnts, algorithm='optimizing')
ps_fit_1.st_design <- svydesign(ids=~1, weights=matches.st.wr$cnts, data=cc2)
reg_ps.st.wr_svy <- svyglm(te_spec_nr.st, design=ps_fit_1.st_design, data=cc2)
summary(reg_ps.st)['treat', 1:2]
Median MAD_SD
7.795391 2.030057
summary(reg_ps.st.wr)['treat', 1:2]
Median MAD_SD
11.360814 1.391956
summary(reg_ps.st.wr_svy)$coef['treat', 1:2]
Estimate Std. Error
2.133385 2.570435
standard regression estimate of treatment effect
set.seed(20)
reg_te <- stan_glm(te_spec_nr, data=cc2, algorithm='optimizing')
standard regression estimate of treatment effect with state
set.seed(20)
reg_te.st <- stan_glm(te_spec_nr.st, data=cc2, algorithm='optimizing')
summary(reg_te)['treat', 1:2]
Median MAD_SD
11.378185 1.230367
summary(reg_te.st)['treat', 1:2]
Median MAD_SD
11.360814 1.391956
Improved pscore model using an interaction or squared term.
transformed variables
cc2$bwT = (cc2$bw-1500)^2
cc2$dayskidT = log(cc2$dayskidh+1)
cc2$pretermT = (cc2$preterm+8)^2
cc2$momageT = (cc2$momage^2)
New ps-spec from psFitR(21)
ps_spec2 <- formula(treat ~ bwg*as.factor(educ) + as.factor(ethnic)*b.marr + work.dur + prenatal + preterm + momage + sex + first + bw + dayskidT + pretermT + momageT + black*(bw + preterm + dayskidT) + b.marr*(bw + preterm + dayskidT) + bw*income)
ps_spec_i21 <- formula(treat~bw+preterm+dayskidh+sex+hispanic+b.marr+lths+hs+ltcoll+work.dur+prenatal+momage+income+bwT+pretermT+income+black:dayskidT+b.marr:bw+b.marr:preterm+b.marr:dayskidT+bw:income)
ps_spec2 <- ps_spec_i21
set.seed(8)
ps_fit_2 <- stan_glm(ps_spec2, family=binomial(link="logit"), data=cc2, algorithm='optimizing')
pscores_2 <- apply(posterior_linpred(ps_fit_2, type='link'), 2, mean)
matches2 <- matching(z=cc2$treat, score=pscores_2, replace=FALSE)
matched2 <- cc2[matches2$match.ind,]
matches2_wr <- matching(z=cc2$treat, score=pscores_2, replace=TRUE)
matched2_wr <- cc2[matches2_wr$match.ind,]
bal_2 <- balance(rawdata=cc2[,covs], cc2$treat, matched=matches2$cnts, estimand='ATT')
Balance diagnostics assume that the estimand is the ATT
bal_2.wr <- balance(rawdata=cc2[,covs], cc2$treat, matched=matches2_wr$cnts, estimand='ATT')
Balance diagnostics assume that the estimand is the ATT
look at some balance plots
par(mfrow=c(1,1))
plot.balance(bal_nr, which.covs='cont', main='ps_fit_1')
4.68266 2.545888 0.9321339 0.162117 0.1177283
$raw
momage age dayskidh preterm bw
0.1177283 0.1621170 0.9321339 2.5458881 4.6826604
$matched
momage age dayskidh preterm bw
0.1573427 0.4290510 0.3774416 0.7148219 0.7720407
plot.balance(bal_2.wr, which.covs='cont', main='ps_fit_2')
4.68266 2.545888 0.9321339 0.162117 0.1177283
$raw
momage age dayskidh preterm bw
0.1177283 0.1621170 0.9321339 2.5458881 4.6826604
$matched
momage age dayskidh preterm bw
0.125639284 0.334463295 0.008587863 0.029032893 0.225210486
plot.balance(bal_nr, which.covs='binary', main='ps_fit_1')
0.01141867 0.0615902 0.2201435 0.1202918 0.09985165 0.2558391 0.1333339 0.1420764 0.02734514 0.03608763 0.03121992 0.03211676
$raw
prenatal work.dur college ltcoll hs lths b.marr
0.03211676 0.03121992 0.03608763 0.02734514 0.14207638 0.13333390 0.25583914
white hispanic black first sex
0.09985165 0.12029181 0.22014346 0.06159020 0.01141867
$matched
prenatal work.dur college ltcoll hs lths
0.020689655 0.031034483 0.055172414 0.006896552 0.072413793 0.024137931
b.marr white hispanic black first sex
0.089655172 0.010344828 0.065517241 0.075862069 0.024137931 0.020689655
plot.balance(bal_2.wr, which.covs='binary', main='ps_fit_2')
0.01141867 0.0615902 0.2201435 0.1202918 0.09985165 0.2558391 0.1333339 0.1420764 0.02734514 0.03608763 0.03121992 0.03211676
$raw
prenatal work.dur college ltcoll hs lths b.marr
0.03211676 0.03121992 0.03608763 0.02734514 0.14207638 0.13333390 0.25583914
white hispanic black first sex
0.09985165 0.12029181 0.22014346 0.06159020 0.01141867
$matched
prenatal work.dur college ltcoll hs lths b.marr
0.03103448 0.11379310 0.04482759 0.03793103 0.04137931 0.12413793 0.07931034
white hispanic black first sex
0.09310345 0.05172414 0.04137931 0.13103448 0.02068966
plot.balance(bal_2.wr, longcovnames=cov_names)
4.68266 2.545888 0.9321339 0.02280011 0.123041 0.162117 0.4395376 0.4132608 0.2031829 0.5157247 0.2685229 0.3149425 0.07345135 0.1119816 0.06335898 0.1549416 0.1177283
$raw
momage prenatal work.dur college ltcoll hs lths
0.11772830 0.15494161 0.06335898 0.11198164 0.07345135 0.31494246 0.26852288
b.marr white hispanic black age first sex
0.51572470 0.20318292 0.41326081 0.43953760 0.16211703 0.12304101 0.02280011
dayskidh preterm bw
0.93213392 2.54588810 4.68266037
$matched
momage prenatal work.dur college ltcoll hs
0.125639284 0.149720363 0.230936365 0.139102147 0.101885952 0.091726025
lths b.marr white hispanic black age
0.250003004 0.159875082 0.189451350 0.177697544 0.082617775 0.334463295
first sex dayskidh preterm bw
0.261772410 0.041311835 0.008587863 0.029032893 0.225210486
te_spec2 <- formula(ppvtr.36 ~ treat + hispanic + black + b.marr + lths + hs + ltcoll + work.dur + prenatal + momage + sex + first + preterm + dayskidh + bw + income)
te_spec2 <- te_spec_nr
set.seed(8)
# MwoR
reg_ps2 <- stan_glm(te_spec2, data=matched2, algorithm='optimizing')
# MwR
reg_ps2.design <- svydesign(ids=~1, weights=matches2_wr$cnts, data=cc2)
reg_ps2.wr <- svyglm(te_spec2, design=reg_ps2.design, data=cc2)
summary(reg_ps2)['treat', 1:2]
Median MAD_SD
10.217023 1.707764
summary(reg_ps2.wr)$coef['treat', 1:2]
Estimate Std. Error
8.219256 2.376333
ps_spec2.st <- update(ps_spec2, . ~ . + st5 + st9 + st12 + st25 + st36 + st42 + st48 + st53)
set.seed(8)
ps_fit_2.st <- stan_glm(ps_spec2.st, family=binomial(link='logit'), data=cc2, algorithm='optimizing')
pscores_2.st <- apply(posterior_linpred(ps_fit_2.st, type='link'), 2, mean)
matches2.st <- matching(z=cc2$treat, score=pscores_2.st, replace=FALSE)
matched2.st <- cc2[matches2.st$match.ind,]
matches2.st_wr <- matching(z=cc2$treat, score=pscores_2.st, replace=TRUE)
te_spec2.st <- update(te_spec2, . ~ . + st5 + st9 + st12 + st25 + st36 + st42 + st48 + st53)
set.seed(8)
# MwoR
reg_ps2.st <- stan_glm(te_spec2.st, data=matched2.st, algorithm='optimizing')
reg_ps2.st_design <- svydesign(ids=~1, weights=matches2.st_wr$cnts, data=cc2)
reg_ps2.st.wr <- svyglm(te_spec2.st, design=reg_ps2.st_design, data=cc2)
summary(reg_ps2.st)['treat', 1:2]
Median MAD_SD
8.002963 2.114469
summary(reg_ps2.st.wr)$coef['treat', 1:2]
Estimate Std. Error
7.564377 2.185574
wt.iptw <- inv.logit(pscores) / (1 - inv.logit(pscores))
wt.iptw[cc2$treat==0] <- wt.iptw[cc2$treat==0] * (sum(wt.iptw[cc2$treat==0]) / sum(cc2$treat==0))
wt.iptw[cc2$treat==1] <- 1
set.seed(20)
ps_fit_iptw_design <- svydesign(ids=~1, weights=wt.iptw, data=cc2)
reg_ps.iptw <- svyglm(te_spec_nr, design=ps_fit_iptw_design, data=cc2)
summary(reg_ps.iptw)$coef['treat', 1:2]
Estimate Std. Error
8.358197 2.652154
Table of ratio of standard deviations across treatment & control groups for unmatched, MWOR, MWR.
cont_vars <- c('bw', 'preterm', 'dayskidh', 'momage', 'income', 'age')
sds.um <- sapply(cont_vars, function(x){
tapply(cc2[,x], cc2$treat, sd)
})
sds.mwor <- sapply(cont_vars, function(x){
tapply(matched[,x], matched$treat, sd)
})
mwr.ind <- rep(cc2$row.names, times=matches.wr$cnts)
matched.wr <- cc2[mwr.ind, ]
sds.mwr <- sapply(cont_vars, function(x){
tapply(matched.wr[,x], matched.wr$treat, sd)
})