\documentclass[landscape]{sciposter}

\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{multicol}
\usepackage{bm}
\usepackage{hyperref}
\usepackage{natbib}

% mainCol background colour (default white)
% TextCol normal text colour (default black)
% SectionCol section header colour (default black)
% BoxCol section box colour (default light grey)
\definecolor{mainCol}{rgb}{1,1,1}
\definecolor{BoxCol}{rgb}{0.9,.9,1}
\definecolor{TextCol}{rgb}{0,0,0}
\definecolor{SectionCol}{rgb}{0,0,0}

\definecolor{Blue}{rgb}{0.3,0.3,0.9}
\definecolor{Red}{rgb}{1,0,0}

%% Commands for making pretty table spaces
\newcommand\T{\rule{0pt}{2.6ex}}
\newcommand\B{\rule[-1.2ex]{0pt}{0pt}}

% === text colors===
\usepackage{color}
\hypersetup{colorlinks,breaklinks,
            linkcolor=darkblue,urlcolor=darkblue,
            anchorcolor=darkblue,citecolor=darkblue,
            pdfstartview={FitH},pdfpagemode=None}

%% sciposter documentation available at:
%http://www.tex.ac.uk/tex-archive/macros/latex/contrib/sciposter/scipostermanual.pdf

\title{Optimally Selecting Matched Samples}
%% A Graphical Method for Optimally Selecting Matched Datasets

\author{Rich Nielsen$^{1,3}$\\
%web: http://people.fas.harvard.edu/$\sim$rnielsen/\\
(portions from joint work with Gary King$^1$, Aaron Wells$^2$, Carter Coberley$^2$, and James Pope$^2$)}

\institute{$^1$Harvard University, $^2$Healthways, Inc., $^3$web: http://people.fas.harvard.edu/$\sim$rnielsen/}

%\email{\url{http://people.fas.harvard.edu/~rnielsen/}} 

\begin{document}


\conference{Prepared for Polmeth XXVII, the 27th Annual Summer Conference of the Society for Political Methodology, 22-24 July 2010, University of Iowa}



\maketitle

\begin{multicols}{3}

\section*{\Large{Overview}}



%\begin{large} 
\medskip
%\begin{itemize}
 %\item Existing practices for choosing matching solutions may be sub-optimal.
%\medskip
 %\item We present a simple graphical method for comparing many matched samples and selecting the best one(s).
%\medskip
% \item We illustrate the utility of our approach by showing that propensity scores matching with calipers gives poorer results as the number of covariates grows.

%\end{itemize}
\begin{large} 
We apply a new and simple graphical method (the ``space graph"; Iacus, King, and Porro, 2010)  for evaluating many matched samples and selecting the best one(s).  We then use this technique to reveal patterns in the relative performance of matching methods across data sets.  We also identify an important and previously unnoticed problem that causes propensity score matching with calipers to fail in precisely the applications for which it was designed.
\end{large}
\bigskip

\section*{\Large{Current Matching Practices}}

\begin{large}
\bigskip
In the applied matching literature, current practice is to:
\vspace{6pt}\\
\begin{enumerate}
 \item Choose a single matching method (rather than comparing methods).\\
\vspace{4pt}\\
{\color{Red} PROBLEM:}  Matching methods may work better or worse on a given dataset.  A related problem is the \textbf{Lalonde Data Fallacy} --- just because a matching method works well on the Lalonde data doesn't mean that it necessarily works well on \textit{your} data.\\
\medskip
  \item Tune the matching method unsystematically.\\
\vspace{4pt}\\
{\color{Red} PROBLEM:}  Unsystematic searches rarely identify optimal matched samples.\\
\medskip
  \item Assess balance using the means of the covariates.\\
\vspace{4pt}\\
{\color{Red} PROBLEM:}  Ignores higher moments and interactions.\\
\medskip
%  \item Report the results from one matching solution.\\
%\vspace{7pt}\\
%{\color{Red} PROBLEM:  If it is not clear which matching solution is best, then results may be sensitive to the choice of matching solutions}\\
\end{enumerate}







\section*{\Large{Solution: Compare Matching Solutions Graphically}}

\medskip


\begin{itemize}
 \item \Large{\textbf{{\color{Blue}Matching is a Clustering Problem.}}}
\vspace{5pt}\\
  Matching clusters observations into two groups, \textit{matched} and \textit{discarded}.  Better solutions have good balance with many observations matched.  We could find the best solutions by searching over all possible clusters of the data, but that is a very large number of clusters.
\medskip

\item \Large{\textbf{{\color{Blue}Matching Methods Search the Clusters Efficiently.}}}
\vspace{5pt}\\
  Existing (and future) matching methods are all ways of identifying relatively good clusters much more efficiently than random clustering.

\end{itemize}



%% Use Adobe pro to blow the original 6 by 6 figure, 225%
%% Figure made with /nfs/home/R/rnielsen/shared_space/rewards/healthways/aidshocks/plot_16jul2010.R
%\bigskip
%\begin{center}
 %\Large\textbf{Comparing Matching Solutions using $\mathcal{L}_1$ Balance}\\
 \includegraphics{aidshocksSpacegraph_16jul2010LARGE.pdf}\\
%\begin{footnotesize}                                          
%\end{footnotesize}
%\end{center}
%\smallskip

%\bigskip
%\begin{center}
 %\Large\textbf{Comparing Matching Solutions using Mean Balance}\\
% \includegraphics{aidshocksSpacegraphMdiff_16jul2010.pdf}\\
%\begin{footnotesize}                                          
%\end{footnotesize}
%\end{center}
%\bigskip

\begin{itemize}
\item \Large{\textbf{{\color{Blue}Compare Solutions on a Spacegraph.}}}
%\vspace{5pt}\\

  The x-axis is the number of matched observations and the y-axis is the balance.  To measure balance, we use $\mathcal{L}_1$ (Iacus, King, and Porro, 2009), which accounts for higher moments and interactions.

\begin{eqnarray}
\mathcal{L}_1(f,g;H) = \frac{1}{2}\sum_{\ell_1\cdots\ell_k \in H(\mathbf{X})} \arrowvert f_{\ell_1\cdots\ell_k} - g_{\ell_1\cdots\ell_k} \arrowvert .\nonumber
\end{eqnarray}\\
We obtain similar results with other common measures of balance.
\vspace{3pt}\\

\item \Large{\textbf{{\color{Blue}Choose Solutions on the Frontier.}}}\\
%\vspace{5pt}\\
  %There is a bias variance trade-off --- better balance can only be achieved by discarding more observations.  
We identify the \textit{frontier} --- a lower bound of achievable balance at a given sample size.  Only solutions on this frontier are defensible choices. %, but any solutions not on the frontier should not be chosen.
\bigskip

%\item At any given sample size, only one matching solution only one solution is 
\end{itemize}

%\vspace{20pt}




\section*{\Large{Application: Discovery of a Propensity Score Paradox}}
\medskip

\begin{itemize}
\item \Large{\textbf{{\color{Blue}Calipers can \textit{{\color{Red}decrease}} propensity score balance.}}}
\vspace{4pt}\\
If the propensity score works as expected, stricter calipers should lead to better balance.  We find the opposite in reasonably low dimensions for both real data (to the left) and simulated data (below).
\vspace{5pt}\\

\end{itemize}

%\begin{itemize}
%\item This paradox holds for many real-world and simulated datasets.
%\item This paradox holds whether we use $\mathcal{L}_1$ or mean balance.
%\item This means that propensity scores have poor properties when applied to some datasets.
%\end{itemize}


%The paradox arises because of the curse of dimensionality.  We show this with simulated data as follows:\\



\textbf{Simulation: Data that Meets the Assumptions of the Propensity Score}
\end{large}
\texttt{
\begin{enumerate}
\item Draw $\mathbf{X}$, a matrix of 1000 observations with $k$ covariates\\ ($k = 1,\ldots, 9$).  The covariates are drawn from standardized\\ multivariate normal distributions with correlations of 0.2.
\item Assign treatment: $\textrm{P}(treat) = \mathbf{X^T}1 + \epsilon$  with  $\epsilon \sim \mathcal{N}(0,1)$.
\item Estimate propensity scores using logit and do 1-to-1 matching.
\item Iteratively remove the worst match from the dataset, so that the first matched dataset has $2\cdot N_{treated}$ obs., the second dataset has $2\cdot (N_{treated}-1)$ obs., etc, until the last dataset has 2 observations.
\item Calculate $\mathcal{L}_1$ for the data at each iteration.
\item Repeat steps 1-5 fifty times at each value of $k$.  Each iteration is a blue line in the figure below.\vspace{10pt}\\
\end{enumerate}
}
%If the propensity score works as expected, stricter calipers should have better balance.  We find the opposite in reasonably low dimensions.


%%Blow it up to 165% in adobe pdf

\begin{center}
 \large\textbf{Propensity Score Paradox: Simulated MVN Data}\\
 \includegraphics{caliperIncreasingK_16jul2010LARGE.pdf}\\
\begin{footnotesize}                                          
\end{footnotesize}
\end{center}
%\medskip







%\bibliography{suicide}
%\bibliographystyle{pa}

\end{multicols}

\end{document}
