\begin{thebibliography}{33}
\providecommand{\natexlab}[1]{#1}
\providecommand{\url}[1]{\texttt{#1}}
\expandafter\ifx\csname urlstyle\endcsname\relax
  \providecommand{\doi}[1]{doi: #1}\else
  \providecommand{\doi}{doi: \begingroup \urlstyle{rm}\Url}\fi

\bibitem[Abbasi-Yadkori et~al.(2020)Abbasi-Yadkori, Pacchiano, and
  Phan]{abbasi2020regret}
Yasin Abbasi-Yadkori, Aldo Pacchiano, and My~Phan.
\newblock Regret balancing for bandit and rl model selection.
\newblock \emph{arXiv preprint arXiv:2006.05491}, 2020.

\bibitem[Agarwal et~al.(2017)Agarwal, Luo, Neyshabur, and
  Schapire]{agarwal2017corralling}
Alekh Agarwal, Haipeng Luo, Behnam Neyshabur, and Robert~E Schapire.
\newblock Corralling a band of bandit algorithms.
\newblock In \emph{Conference on Learning Theory}, pages 12--38. PMLR, 2017.

\bibitem[Auer et~al.(2002)Auer, Cesa-Bianchi, Freund, and
  Schapire]{auer2002nonstochastic}
Peter Auer, Nicolo Cesa-Bianchi, Yoav Freund, and Robert~E Schapire.
\newblock The nonstochastic multiarmed bandit problem.
\newblock \emph{SIAM journal on computing}, 32\penalty0 (1):\penalty0 48--77,
  2002.

\bibitem[Ayoub et~al.(2020)Ayoub, Jia, Szepesvari, Wang, and
  Yang]{ayoub2020model}
Alex Ayoub, Zeyu Jia, Csaba Szepesvari, Mengdi Wang, and Lin Yang.
\newblock Model-based reinforcement learning with value-targeted regression.
\newblock In \emph{International Conference on Machine Learning}, pages
  463--474. PMLR, 2020.

\bibitem[Azuma(1967)]{azuma1967weighted}
Kazuoki Azuma.
\newblock Weighted sums of certain dependent random variables.
\newblock \emph{Tohoku Mathematical Journal, Second Series}, 19\penalty0
  (3):\penalty0 357--367, 1967.

\bibitem[Bartlett et~al.(2005)Bartlett, Bousquet, Mendelson,
  et~al.]{bartlett2005local}
Peter~L Bartlett, Olivier Bousquet, Shahar Mendelson, et~al.
\newblock Local rademacher complexities.
\newblock \emph{The Annals of Statistics}, 33\penalty0 (4):\penalty0
  1497--1537, 2005.

\bibitem[Cai et~al.(2020)Cai, Yang, Jin, and Wang]{cai2020provably}
Qi~Cai, Zhuoran Yang, Chi Jin, and Zhaoran Wang.
\newblock Provably efficient exploration in policy optimization.
\newblock In \emph{International Conference on Machine Learning}, pages
  1283--1294. PMLR, 2020.

\bibitem[Chatterji et~al.(2020)Chatterji, Muthukumar, and
  Bartlett]{chatterji2020osom}
Niladri Chatterji, Vidya Muthukumar, and Peter Bartlett.
\newblock Osom: A simultaneously optimal algorithm for multi-armed and linear
  contextual bandits.
\newblock In \emph{International Conference on Artificial Intelligence and
  Statistics}, pages 1844--1854. PMLR, 2020.

\bibitem[Foster et~al.(2019)Foster, Krishnamurthy, and Luo]{foster2019model}
Dylan Foster, Akshay Krishnamurthy, and Haipeng Luo.
\newblock Model selection for contextual bandits.
\newblock \emph{Advances in neural information processing systems}, 2019.

\bibitem[Foster et~al.(2020)Foster, Krishnamurthy, and Luo]{foster2020open}
Dylan~J Foster, Akshay Krishnamurthy, and Haipeng Luo.
\newblock Open problem: Model selection for contextual bandits.
\newblock In \emph{Conference on Learning Theory}, pages 3842--3846. PMLR,
  2020.

\bibitem[Foster et~al.(2021)Foster, Krishnamurthy, Simchi-Levi, and
  Xu]{foster2021offline}
Dylan~J Foster, Akshay Krishnamurthy, David Simchi-Levi, and Yunzong Xu.
\newblock Offline reinforcement learning: Fundamental barriers for value
  function approximation.
\newblock \emph{arXiv preprint arXiv:2111.10919}, 2021.

\bibitem[Ghosh et~al.(2021)Ghosh, Sankararaman, and Kannan]{ghosh2021problem}
Avishek Ghosh, Abishek Sankararaman, and Ramchandran Kannan.
\newblock Problem-complexity adaptive model selection for stochastic linear
  bandits.
\newblock In \emph{International Conference on Artificial Intelligence and
  Statistics}, pages 1396--1404. PMLR, 2021.

\bibitem[He et~al.(2021)He, Zhou, and Gu]{he2020logarithmic}
Jiafan He, Dongruo Zhou, and Quanquan Gu.
\newblock Logarithmic regret for reinforcement learning with linear function
  approximation.
\newblock In \emph{International Conference on Machine Learning}. PMLR, 2021.

\bibitem[Jia et~al.(2020)Jia, Yang, Szepesvari, and Wang]{jia2020model}
Zeyu Jia, Lin Yang, Csaba Szepesvari, and Mengdi Wang.
\newblock Model-based reinforcement learning with value-targeted regression.
\newblock In \emph{Learning for Dynamics and Control}, pages 666--686. PMLR,
  2020.

\bibitem[Jiang et~al.(2017)Jiang, Krishnamurthy, Agarwal, Langford, and
  Schapire]{jiang2017contextual}
Nan Jiang, Akshay Krishnamurthy, Alekh Agarwal, John Langford, and Robert~E
  Schapire.
\newblock Contextual decision processes with low bellman rank are
  pac-learnable.
\newblock In \emph{International Conference on Machine Learning}, pages
  1704--1713. PMLR, 2017.

\bibitem[Jin et~al.(2020)Jin, Yang, Wang, and Jordan]{jin2020provably}
Chi Jin, Zhuoran Yang, Zhaoran Wang, and Michael~I Jordan.
\newblock Provably efficient reinforcement learning with linear function
  approximation.
\newblock In \emph{Conference on Learning Theory}, pages 2137--2143. PMLR,
  2020.

\bibitem[Jin et~al.(2021)Jin, Yang, and Wang]{jin2021pessimism}
Ying Jin, Zhuoran Yang, and Zhaoran Wang.
\newblock Is pessimism provably efficient for offline rl?
\newblock In \emph{International Conference on Machine Learning}, pages
  5084--5096. PMLR, 2021.

\bibitem[Min et~al.(2021)Min, Wang, Zhou, and Gu]{min2021variance}
Yifei Min, Tianhao Wang, Dongruo Zhou, and Quanquan Gu.
\newblock Variance-aware off-policy evaluation with linear function
  approximation.
\newblock \emph{Advances in neural information processing systems}, 34, 2021.

\bibitem[Modi et~al.(2020)Modi, Jiang, Tewari, and Singh]{modi2020sample}
Aditya Modi, Nan Jiang, Ambuj Tewari, and Satinder Singh.
\newblock Sample complexity of reinforcement learning using linearly combined
  model ensembles.
\newblock In \emph{International Conference on Artificial Intelligence and
  Statistics}, pages 2010--2020. PMLR, 2020.

\bibitem[Odalric and Munos(2011)]{odalric2011adaptive}
Maillard Odalric and R{\'e}mi Munos.
\newblock Adaptive bandits: Towards the best history-dependent strategy.
\newblock In \emph{Proceedings of the Fourteenth International Conference on
  Artificial Intelligence and Statistics}, pages 570--578. JMLR Workshop and
  Conference Proceedings, 2011.

\bibitem[Pacchiano et~al.(2020{\natexlab{a}})Pacchiano, Dann, Gentile, and
  Bartlett]{pacchiano2020regret}
Aldo Pacchiano, Christoph Dann, Claudio Gentile, and Peter Bartlett.
\newblock Regret bound balancing and elimination for model selection in bandits
  and rl.
\newblock \emph{arXiv preprint arXiv:2012.13045}, 2020{\natexlab{a}}.

\bibitem[Pacchiano et~al.(2020{\natexlab{b}})Pacchiano, Phan, Abbasi~Yadkori,
  Rao, Zimmert, Lattimore, and Szepesvari]{pacchiano2020model}
Aldo Pacchiano, My~Phan, Yasin Abbasi~Yadkori, Anup Rao, Julian Zimmert, Tor
  Lattimore, and Csaba Szepesvari.
\newblock Model selection in contextual stochastic bandit problems.
\newblock \emph{Advances in Neural Information Processing Systems}, 33,
  2020{\natexlab{b}}.

\bibitem[Papini et~al.(2021)Papini, Tirinzoni, Restelli, Lazaric, and
  Pirotta]{papini2021leveraging}
Matteo Papini, Andrea Tirinzoni, Marcello Restelli, Alessandro Lazaric, and
  Matteo Pirotta.
\newblock Leveraging good representations in linear contextual bandits.
\newblock In \emph{International Conference on Machine Learning}. PMLR, 2021.

\bibitem[Tropp(2012)]{tropp2012user}
Joel~A Tropp.
\newblock User-friendly tail bounds for sums of random matrices.
\newblock \emph{Foundations of computational mathematics}, 12\penalty0
  (4):\penalty0 389--434, 2012.

\bibitem[Uehara and Sun(2021)]{uehara2021pessimistic}
Masatoshi Uehara and Wen Sun.
\newblock Pessimistic model-based offline rl: Pac bounds and posterior sampling
  under partial coverage.
\newblock \emph{arXiv e-prints}, pages arXiv--2107, 2021.

\bibitem[Xie et~al.(2021)Xie, Cheng, Jiang, Mineiro, and
  Agarwal]{xie2021bellman}
Tengyang Xie, Ching-An Cheng, Nan Jiang, Paul Mineiro, and Alekh Agarwal.
\newblock Bellman-consistent pessimism for offline reinforcement learning.
\newblock \emph{Advances in neural information processing systems}, 34, 2021.

\bibitem[Yang and Wang(2019)]{yang2019sample}
Lin Yang and Mengdi Wang.
\newblock Sample-optimal parametric q-learning using linearly additive
  features.
\newblock In \emph{International Conference on Machine Learning}, pages
  6995--7004. PMLR, 2019.

\bibitem[Yang and Wang(2020)]{yang2020reinforcement}
Lin Yang and Mengdi Wang.
\newblock Reinforcement learning in feature space: Matrix bandit, kernels, and
  regret bound.
\newblock In \emph{International Conference on Machine Learning}, pages
  10746--10756. PMLR, 2020.

\bibitem[Yin et~al.(2021)Yin, Bai, and Wang]{yin2021near}
Ming Yin, Yu~Bai, and Yu-Xiang Wang.
\newblock Near-optimal offline reinforcement learning via double variance
  reduction.
\newblock \emph{Advances in neural information processing systems}, 34, 2021.

\bibitem[Yin et~al.(2022)Yin, Duan, Wang, and Wang]{yin2022nearoptimal}
Ming Yin, Yaqi Duan, Mengdi Wang, and Yu-Xiang Wang.
\newblock Near-optimal offline reinforcement learning with linear
  representation: Leveraging variance information with pessimism.
\newblock In \emph{International Conference on Learning Representations}, 2022.

\bibitem[Zanette et~al.(2020)Zanette, Lazaric, Kochenderfer, and
  Brunskill]{zanette2020learning}
Andrea Zanette, Alessandro Lazaric, Mykel Kochenderfer, and Emma Brunskill.
\newblock Learning near optimal policies with low inherent bellman error.
\newblock In \emph{International Conference on Machine Learning}, pages
  10978--10989. PMLR, 2020.

\bibitem[Zhou et~al.(2021{\natexlab{a}})Zhou, Gu, and
  Szepesvari]{zhou2020nearly}
Dongruo Zhou, Quanquan Gu, and Csaba Szepesvari.
\newblock Nearly minimax optimal reinforcement learning for linear mixture
  markov decision processes.
\newblock In \emph{Conference on Learning Theory}. PMLR, 2021{\natexlab{a}}.

\bibitem[Zhou et~al.(2021{\natexlab{b}})Zhou, He, and Gu]{zhou2020provably}
Dongruo Zhou, Jiafan He, and Quanquan Gu.
\newblock Provably efficient reinforcement learning for discounted mdps with
  feature mapping.
\newblock In \emph{International Conference on Machine Learning}. PMLR,
  2021{\natexlab{b}}.

\end{thebibliography}
