install.packages("MatchIt") install.packages("cem") library(MatchIt) library(cem) library(Zelig) ## LOAD DATA: Lalonde (1986) dataset; from cem package data(LL) # this isn't the fulle Lalonde dataset, it's a small subset # Naive calculation of the ATE mean(LL$re78[LL$treated == 1]) - mean(LL$re78[LL$treated == 0]) # Regression estimation of the ATE summary(lm(re78 ~ treated + age + education + black + married + nodegree + re74 + re75 + hispanic + u74 + u75, data = LL)) require(xtable) xtable(lm(re78 ~ treated + age + education + black + married + nodegree + re74 + re75 + hispanic + u74 + u75, data = LL)) # Check balance test <- t.test(LL$age[LL$treat == 1], LL$age[LL$treat == 0]) test$estimate test$p.value pre.imbalance <- imbalance(group=LL$treated, data=LL, drop=c("treated","re78")) pre.imbalance # Exact matching exact.match <- matchit(formula= treated ~ age + education + black + married + nodegree + re74 + re75 + hispanic + u74 + u75, data = LL, method = "exact") exact.data <- match.data(exact.match) head(exact.data) exact.model <- lm(re78 ~ treated+ age + education + black + married + nodegree + re74 + re75 + hispanic + u74 + u75, data = exact.data, weights=exact.data$weights) summary(exact.model) y.treat <- weighted.mean(exact.data$re78[exact.data$treated == 1], exact.data$weights[exact.data$treated == 1]) y.cont <- weighted.mean(exact.data$re78[exact.data$treated == 0], exact.data$weights[exact.data$treated == 0]) y.treat - y.cont ## Propensity score calculation pscores.logit <- glm(treated ~ age + education + black + married + nodegree + re74 + re75 + hispanic + u74 + u75, family = "binomial", data = LL) fittedvalues <- pscores.logit$fitted pscore.treat <- fittedvalues[LL$treated == 1] pscore.control <- fittedvalues[LL$treated == 0] nearest.match <- matchit(formula = treated ~ age + education + black + married + nodegree + re74 + re75 + hispanic + u74 + u75, data = LL, method = "nearest", distance = "logit", discard="control") data.matched <- match.data(nearest.match) imbalance(group=data.matched$treated, data=data.matched, drop=c("treated", "re78", "distance", "weights")) pre.balance <- summary(nearest.match)$sum.all post.balance <- summary(nearest.match)$sum.match # Balance graph plot(x = (pre.balance[,1] - pre.balance[,2]) / pre.balance[,3], y = 1:nrow(pre.balance), pch = 19, col = "black", xlab = "Standardized Imbalance", axes = FALSE, xlim = c(-.25,.25), ylab = "") points(x = (post.balance[,1] - post.balance[,2]) / post.balance[,3], y = 1:nrow(post.balance), pch = 19, col = "green") abline(v = 0, lty = "dashed") axis(1) axis(2, at = 11:1, labels = rownames(pre.balance), las = 1, cex.axis = .8) arrows(x0 = (pre.balance[,1] - pre.balance[,2]) / pre.balance[,3], y0 = 1:nrow(post.balance), x1 = (post.balance[,1] - post.balance[,2]) / post.balance[,3], y1 = 1:nrow(post.balance), length = .1) legend("topright", legend = c("Pre-match","Post-match"), pch = c(19,19), col = c("black","green"), cex = .8, bty = "n") nearest.data <- match.data(nearest.match) mean(nearest.data$re78[nearest.data$treated == 1]) - mean(nearest.data$re78[nearest.data$treated == 0]) nearest.model <- lm(re78 ~ treated + age + education + black + married + nodegree + re74 + re75 + hispanic + u74 + u75, data = nearest.data) ################# ## CEM auto.match <- matchit(formula = treated ~ age + education + black + married + nodegree + re74 + re75 + hispanic + u74 + u75, data = LL, method = "cem") re74cut <- seq(0, 40000, 5000) re75cut <- seq(0, max(LL$re75)+1000, by=1000) agecut <- c(20.5, 25.5, 30.5,35.5,40.5) my.cutpoints <- list(re75=re75cut, re74=re74cut, age=agecut) user.match <- matchit(formula = treated ~ age + education + black + married + nodegree + re74 + re75 + hispanic + u74 + u75, data = LL, method = "cem", cutpoints = my.cutpoints) auto.imb <- imbalance(group=auto.data$treated, data=auto.data, drop=c("treated","re78","distance", "weights","subclass")) user.imb <- imbalance(group=user.data$treated, data=user.data, drop=c("treated","re78","distance", "weights","subclass")) auto.imb user.imb summary(auto.match)$nn summary(user.match)$nn cem.match <- cem(treatment = "treated", data = LL, drop = "re78", cutpoints = my.cutpoints) cem.match.att <- att(obj=cem.match, formula=re78 ~ treated + age + education + black + married + nodegree + re74 + re75 + hispanic + u74 + u75, data = LL, model="linear") cem.match.att cem.match2 <- cem(treatment = "treated", data = LL, drop = "re78", cutpoints = my.cutpoints) cem.match.att2 <- att(obj=cem.match2, formula=re78 ~ treated + age + education + black + married + nodegree + re74+re75 + hispanic + u74 + u75, data = LL, model="linear") cem.match.att2 ###################### ## Frontier install.packages('devtools', repos = 'http://cran.us.r-project.org') library(devtools) install_github('MatchingFrontier', username = 'ChristopherLucas') library(MatchingFrontier) mydataset <- LL mytreatment <- "treated" mydrops <- c("treated","re78") myfrontier <- makeFrontier(mytreatment, mydataset, drop = mydrops, QOI = 'FSATT', metric = 'Mahal', ratio = 'variable') myests <- frontierEst(myfrontier, mydataset, myform = formula('re78 ~ treated'), treatment = mytreatment) frontierMultiPlot(myfrontier, mydataset, myests)