\documentclass[accepted]{uai2025} % for initial submission
%\documentclass[accepted]{uai2025} % after acceptance, for a revised version; 
% also before submission to see how the non-anonymous paper would look like 
                        
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2025} % ptmx math instead of Computer
                                         % Modern (has noticeable issues)
% \documentclass[mathfont=newtx]{uai2025} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools}
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

\usepackage{amsmath,amsthm}
\usepackage{thmtools}
\usepackage{thm-restate}
\usepackage{algorithm}
\usepackage{algpseudocode}
\usepackage{graphicx}
\usepackage{subcaption}
\usepackage{mathrsfs}
\usepackage{amssymb}
\usepackage{multirow}
\usepackage{wrapfig}
\usepackage{dsfont}
\usepackage{comment}
\usepackage{cleveref}

% Optional math commands from https://github.com/goodfeli/dlbook_notation.
\input{math_commands.tex}

\title{Causal Eligibility Traces for Confounding Robust Off-Policy Evaluation}

% The standard author block has changed for UAI 2025 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{Junzhe Zhang}
\author[2]{Elias Bareinboim}

\affil[1]{%
    Department of Electrical Engineering and Computer Science\\
    Syracuse University
}

\affil[2]{%
    Department of Computer Science\\
    Columbia University
}

\begin{document}
\maketitle

\input{section0}
\input{section1}
\input{section2}
\input{section3}
\input{section4}
\input{section5}
\begin{thebibliography}{47}
\providecommand{\natexlab}[1]{#1}
\providecommand{\url}[1]{\texttt{#1}}
\expandafter\ifx\csname urlstyle\endcsname\relax
  \providecommand{\doi}[1]{doi: #1}\else
  \providecommand{\doi}{doi: \begingroup \urlstyle{rm}\Url}\fi

\bibitem[Bajari et~al.(2007)Bajari, Benkard, and Levin]{bajari2007estimating}
Patrick Bajari, C~Lanier Benkard, and Jonathan Levin.
\newblock Estimating dynamic models of imperfect competition.
\newblock \emph{Econometrica}, 75\penalty0 (5):\penalty0 1331--1370, 2007.

\bibitem[Balke and Pearl(1997)]{balke:pea97}
A.~Balke and J.~Pearl.
\newblock Bounds on treatment effects from studies with imperfect compliance.
\newblock \emph{Journal of the American Statistical Association}, 92\penalty0
  (439):\penalty0 1172--1176, September 1997.

\bibitem[Bareinboim et~al.(2022)Bareinboim, Correa, Ibeling, and
  Icard]{bareinboim2022pearl}
Elias Bareinboim, Juan~D Correa, Duligur Ibeling, and Thomas Icard.
\newblock On pearl’s hierarchy and the foundations of causal inference.
\newblock In \emph{Probabilistic and causal inference: the works of judea
  pearl}, pages 507--556. 2022.

\bibitem[Bellman(1966)]{bellman1966dynamic}
Richard Bellman.
\newblock Dynamic programming.
\newblock \emph{Science}, 153\penalty0 (3731):\penalty0 34--37, 1966.

\bibitem[Berry and Compiani(2023)]{berry2023instrumental}
Steven~T Berry and Giovanni Compiani.
\newblock An instrumental variable approach to dynamic models.
\newblock \emph{The Review of Economic Studies}, 90\penalty0 (4):\penalty0
  1724--1758, 2023.

\bibitem[Bruns-Smith and Zhou(2023)]{bruns2023robust}
David Bruns-Smith and Angela Zhou.
\newblock Robust fitted-q-evaluation and iteration under sequentially exogenous
  unobserved confounders.
\newblock \emph{arXiv preprint arXiv:2302.00662}, 2023.

\bibitem[Bugni(2010)]{bugni2010bootstrap}
Federico~A Bugni.
\newblock Bootstrap inference in partially identified models defined by moment
  inequalities: Coverage of the identified set.
\newblock \emph{Econometrica}, 78\penalty0 (2):\penalty0 735--753, 2010.

\bibitem[Dickstein and Morales(2018)]{dickstein2018exporters}
Michael~J Dickstein and Eduardo Morales.
\newblock What do exporters know?
\newblock \emph{The Quarterly Journal of Economics}, 133\penalty0 (4):\penalty0
  1753--1801, 2018.

\bibitem[Guo et~al.(2022)Guo, Cai, Zhang, Yang, and Wang]{guo2022provably}
Hongyi Guo, Qi~Cai, Yufeng Zhang, Zhuoran Yang, and Zhaoran Wang.
\newblock Provably efficient offline reinforcement learning for partially
  observable markov decision processes.
\newblock In \emph{International Conference on Machine Learning}, pages
  8016--8038. PMLR, 2022.

\bibitem[Imbens and Rubin(1997)]{imbens1997bayesian}
Guido~W Imbens and Donald~B Rubin.
\newblock Bayesian inference for causal effects in randomized experiments with
  noncompliance.
\newblock \emph{The annals of statistics}, pages 305--327, 1997.

\bibitem[Jaakkola et~al.(1994)Jaakkola, Singh, and
  Jordan]{jaakkola1994reinforcement}
Tommi Jaakkola, Satinder Singh, and Michael Jordan.
\newblock Reinforcement learning algorithm for partially observable markov
  decision problems.
\newblock \emph{Advances in neural information processing systems}, 7, 1994.

\bibitem[Jiang and Li(2016)]{jiang2015doubly}
Nan Jiang and Lihong Li.
\newblock Doubly robust off-policy value evaluation for reinforcement learning.
\newblock In Maria~Florina Balcan and Kilian~Q. Weinberger, editors,
  \emph{Proceedings of The 33rd International Conference on Machine Learning},
  volume~48 of \emph{Proceedings of Machine Learning Research}, pages 652--661,
  New York, New York, USA, 20--22 Jun 2016. PMLR.
\newblock URL \url{http://proceedings.mlr.press/v48/jiang16.html}.

\bibitem[Joshi et~al.(2024)Joshi, Zhang, and Bareinboim]{joshi2024towards}
Shalmali Joshi, Junzhe Zhang, and Elias Bareinboim.
\newblock Towards safe policy learning under partial identifiability: A causal
  approach.
\newblock In \emph{Proceedings of the AAAI Conference on Artificial
  Intelligence}, volume~38, pages 13004--13012, 2024.

\bibitem[Kallus and Zhou(2018)]{kallus2018confounding}
Nathan Kallus and Angela Zhou.
\newblock Confounding-robust policy improvement.
\newblock In \emph{Proceedings of the 32nd International Conference on Neural
  Information Processing Systems}, pages 9289--9299, 2018.

\bibitem[Kallus and Zhou(2020)]{kallus2020confounding}
Nathan Kallus and Angela Zhou.
\newblock Confounding-robust policy evaluation in infinite-horizon
  reinforcement learning.
\newblock In H.~Larochelle, M.~Ranzato, R.~Hadsell, M.~F. Balcan, and H.~Lin,
  editors, \emph{Advances in Neural Information Processing Systems}, volume~33,
  pages 22293--22304. Curran Associates, Inc., 2020.

\bibitem[Kausik et~al.(2024)Kausik, Lu, Tan, Makar, Wang, and
  Tewari]{kausik2024offline}
Chinmaya Kausik, Yangyi Lu, Kevin Tan, Maggie Makar, Yixin Wang, and Ambuj
  Tewari.
\newblock Offline policy evaluation and optimization under confounding.
\newblock In \emph{International Conference on Artificial Intelligence and
  Statistics}, pages 1459--1467. PMLR, 2024.

\bibitem[Khan et~al.(2023)Khan, Saveski, and Ugander]{khan2023off}
Samir Khan, Martin Saveski, and Johan Ugander.
\newblock Off-policy evaluation beyond overlap: partial identification through
  smoothness.
\newblock \emph{arXiv preprint arXiv:2305.11812}, 2023.

\bibitem[Kumor et~al.(2021)Kumor, Zhang, and Bareinboim]{kumor2021causal}
Daniel Kumor, Junzhe Zhang, and Elias Bareinboim.
\newblock Sequential causal imitation learning with unobserved confounders.
\newblock \emph{Advances in Neural Information Processing Systems}, 2021.

\bibitem[Leike et~al.(2017)Leike, Martic, Krakovna, Ortega, Everitt, Lefrancq,
  Orseau, and Legg]{leike2017ai}
Jan Leike, Miljan Martic, Victoria Krakovna, Pedro~A Ortega, Tom Everitt,
  Andrew Lefrancq, Laurent Orseau, and Shane Legg.
\newblock Ai safety gridworlds.
\newblock \emph{arXiv preprint arXiv:1711.09883}, 2017.

\bibitem[Manski(1990)]{manski:90}
C.F. Manski.
\newblock Nonparametric bounds on treatment effects.
\newblock \emph{American Economic Review, Papers and Proceedings}, 80:\penalty0
  319--323, 1990.

\bibitem[Moon and Schorfheide(2012)]{moon2012bayesian}
Hyungsik~Roger Moon and Frank Schorfheide.
\newblock Bayesian and frequentist inference in partially identified models.
\newblock \emph{Econometrica}, 80\penalty0 (2):\penalty0 755--782, 2012.

\bibitem[Morales et~al.(2019)Morales, Sheu, and Zahler]{morales2019extended}
Eduardo Morales, Gloria Sheu, and Andr{\'e}s Zahler.
\newblock Extended gravity.
\newblock \emph{The Review of economic studies}, 86\penalty0 (6):\penalty0
  2668--2712, 2019.

\bibitem[Munos et~al.(2016)Munos, Stepleton, Harutyunyan, and
  Bellemare]{munos2016safe}
R{\'e}mi Munos, Tom Stepleton, Anna Harutyunyan, and Marc Bellemare.
\newblock Safe and efficient off-policy reinforcement learning.
\newblock In \emph{Advances in Neural Information Processing Systems}, pages
  1054--1062, 2016.

\bibitem[Namkoong et~al.(2020)Namkoong, Keramati, Yadlowsky, and
  Brunskill]{namkoong2020off}
Hongseok Namkoong, Ramtin Keramati, Steve Yadlowsky, and Emma Brunskill.
\newblock Off-policy policy evaluation for sequential decisions under
  unobserved confounding.
\newblock \emph{Advances in Neural Information Processing Systems},
  33:\penalty0 18819--18831, 2020.

\bibitem[Norets and Tang(2014)]{norets2014semiparametric}
Andriy Norets and Xun Tang.
\newblock Semiparametric inference in dynamic binary choice models.
\newblock \emph{Review of Economic Studies}, 81\penalty0 (3):\penalty0
  1229--1262, 2014.

\bibitem[Pearl and Robins(1995)]{pearl:rob95}
J.~Pearl and J.M. Robins.
\newblock Probabilistic evaluation of sequential plans from causal models with
  hidden variables.
\newblock In P.~Besnard and S.~Hanks, editors, \emph{Proceedings of the
  Eleventh Conference on Uncertainty in Artificial Intelligence (UAI 1995)},
  pages 444--453. Morgan Kaufmann, San Francisco, 1995.

\bibitem[Pearl(2000)]{pearl:2k}
Judea Pearl.
\newblock \emph{Causality: Models, Reasoning, and Inference}.
\newblock Cambridge University Press, New York, 2000.

\bibitem[Perkovi{\'c} et~al.(2015)Perkovi{\'c}, Textor, Kalisch, and
  Maathuis]{perkovic:15}
Emilija Perkovi{\'c}, Johannes Textor, Markus Kalisch, and Marloes~H Maathuis.
\newblock A complete generalized adjustment criterion.
\newblock \emph{arXiv preprint arXiv:1507.01524}, 2015.

\bibitem[Poirier(1998)]{poirier1998revising}
Dale~J Poirier.
\newblock Revising beliefs in nonidentified models.
\newblock \emph{Econometric theory}, 14\penalty0 (4):\penalty0 483--509, 1998.

\bibitem[Precup et~al.(2000)Precup, Sutton, and Singh]{precup2000eligibility}
Doina Precup, Richard~S. Sutton, and Satinder~P. Singh.
\newblock Eligibility traces for off-policy policy evaluation.
\newblock In \emph{Proceedings of the Seventeenth International Conference on
  Machine Learning}, pages 759--766, 2000.

\bibitem[Puterman(1994)]{puterman1994markov}
Martin~L Puterman.
\newblock \emph{Markov Decision Processes: Discrete Stochastic Dynamic
  Programming}.
\newblock John Wiley \& Sons, Inc., 1994.

\bibitem[Robbins(1985)]{robbins1985some}
Herbert Robbins.
\newblock Some aspects of the sequential design of experiments.
\newblock In \emph{Herbert Robbins Selected Papers}, pages 169--177. Springer,
  1985.

\bibitem[Romano and Shaikh(2008)]{romano2008inference}
Joseph~P Romano and Azeem~M Shaikh.
\newblock Inference for identifiable parameters in partially identified
  econometric models.
\newblock \emph{Journal of Statistical Planning and Inference}, 138\penalty0
  (9):\penalty0 2786--2807, 2008.

\bibitem[Ruan et~al.(2024)Ruan, Zhang, Di, and Bareinboim]{ruan2024causal}
Kangrui Ruan, Junzhe Zhang, Xuan Di, and Elias Bareinboim.
\newblock Causal imitation for markov decision processes: A partial
  identification approach.
\newblock \emph{Advances in neural information processing systems}, 2024.

\bibitem[Shpitser et~al.(2010)Shpitser, VanderWeele, and
  Robins]{shpitser:etal10}
I.\ Shpitser, T.J.\ VanderWeele, and J.M. Robins.
\newblock On the validity of covariate adjustment for estimating causal
  effects.
\newblock In \emph{Proceedings of the Twenty-Sixth Conference on Uncertainty in
  Artificial Intelligence}, pages 527--536. AUAI, Corvallis, OR, 2010.

\bibitem[Stoye(2009)]{stoye2009more}
J{\"o}rg Stoye.
\newblock More on confidence intervals for partially identified parameters.
\newblock \emph{Econometrica}, 77\penalty0 (4):\penalty0 1299--1315, 2009.

\bibitem[Sutton(1988)]{sutton1988learning}
Richard~S Sutton.
\newblock Learning to predict by the methods of temporal differences.
\newblock \emph{Machine learning}, 3:\penalty0 9--44, 1988.

\bibitem[Sutton and Barto(1998)]{sutton1998reinforcement}
Richard~S. Sutton and Andrew~G. Barto.
\newblock \emph{Reinforcement learning: An introduction}.
\newblock MIT press, 1998.

\bibitem[Swaminathan and Joachims(2015)]{swaminathan2015counterfactual}
Adith Swaminathan and Thorsten Joachims.
\newblock Counterfactual risk minimization: Learning from logged bandit
  feedback.
\newblock In \emph{International Conference on Machine Learning}, pages
  814--823, 2015.

\bibitem[Todem et~al.(2010)Todem, Fine, and Peng]{todem2010global}
D~Todem, J~Fine, and L~Peng.
\newblock A global sensitivity test for evaluating statistical hypotheses with
  nonidentifiable models.
\newblock \emph{Biometrics}, 66\penalty0 (2):\penalty0 558--566, 2010.

\bibitem[Watkins and Dayan(1992)]{watkins1992q}
Christopher~JCH Watkins and Peter Dayan.
\newblock Q-learning.
\newblock \emph{Machine learning}, 8\penalty0 (3-4):\penalty0 279--292, 1992.

\bibitem[Watkins(1989)]{watkins1989learning}
Christopher John Cornish~Hellaby Watkins.
\newblock \emph{Learning from delayed rewards}.
\newblock PhD thesis, University of Cambridge England, 1989.

\bibitem[Zhang and Bareinboim(2024)]{zhang2024eligibility}
J.~Zhang and E.~Bareinboim.
\newblock Eligibility traces for confounding robust off-policy evaluation.
\newblock Technical Report R-105, Causal Artificial Intelligence Lab, Columbia
  University, May 2024.

\bibitem[Zhang and Bareinboim(2019)]{zhang2019near}
Junzhe Zhang and Elias Bareinboim.
\newblock Near-optimal reinforcement learning in dynamic treatment regimes.
\newblock In \emph{Advances in Neural Information Processing Systems}, pages
  13401--13411, 2019.

\bibitem[Zhang and Bareinboim(2021)]{zhang2021bounding}
Junzhe Zhang and Elias Bareinboim.
\newblock Bounding causal effects on continuous outcomes.
\newblock In \emph{Proceedings of the 35nd AAAI Conference on Artificial
  Intelligence}, 2021.

\bibitem[Zhang et~al.(2020)Zhang, Kumor, and Bareinboim]{zhang2020causal}
Junzhe Zhang, Daniel Kumor, and Elias Bareinboim.
\newblock Causal imitation learning with unobserved confounders.
\newblock \emph{Advances in Neural Information Processing Systems}, 33, 2020.

\bibitem[Zhang et~al.(2022)Zhang, Tian, and Bareinboim]{zhang2022partial}
Junzhe Zhang, Jin Tian, and Elias Bareinboim.
\newblock Partial counterfactual identification from observational and
  experimental data.
\newblock In \emph{International Conference on Machine Learning}, pages
  26548--26558. PMLR, 2022.

\end{thebibliography}
\end{document}