\documentclass[accepted]{uai2023} % for initial submission
% \documentclass[accepted]{uai2023} % after acceptance, for a revised
% version; also before submission to
% see how the non-anonymous paper
% would look like

%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2023} % ptmx math instead of Computer
% Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2023} % newtx fonts (improves upon
 % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

% for cross referencing the main text
% PLEASE ONLY USE xr IN THE SUPPLEMENTARY MATERIAL. 
% In the main paper, hard code any cross-reference to the supplementary material. 
%\usepackage{xr} 
%\externaldocument{uai2023-template}

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{Assessing the Impact of Context Inference Error and Partial Observability\\
on RL Methods for Just-In-Time Adaptive Interventions\\
(Supplementary Material)}


% The standard author block has changed for UAI 2023 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
%\author[1]{\href{mailto:<jj@example.edu>?Subject=Your UAI 2023 paper}{Jane~J.~von~O'L\'opez}{}}
%\author[1]{Harry~Q.~Bovik}
%\author[1,2]{Further~Coauthor}
%\author[3]{Further~Coauthor}
%\author[1]{Further~Coauthor}
%\author[3]{Further~Coauthor}
%\author[3,1]{Further~Coauthor}
% Add affiliations after the authors
%\affil[1]{%
%    Computer Science Dept.\\
%    Cranberry University\\
%    Pittsburgh, Pennsylvania, USA
%}
%\affil[2]{%
%    Second Affiliation\\
%    Address\\
%    …
%}
%\affil[3]{%
%    Another Affiliation\\
%    Address\\
%    …
%  }
\author[1]{\href{mailto:<karine@cs.umass.edu>?Subject=UAI 2023 paper}{Karine Karine}}
\author[2]{Predrag Klasnja}
\author[3]{Susan A. Murphy}
\author[1]{Benjamin M. Marlin}
\affil[1]{University of Massachusetts Amherst}
\affil[2]{University of Michigan}
\affil[3]{Harvard University}

\begin{document}
  
%\onecolumn %% Turn this off if single column is desired for the supplement

\maketitle
\appendix

\section{Additional Results}
\subsection{Action Selection Analysis for REINFORCE and DQN}

Figures \ref{fig:REINFORCE-actions} and \ref{fig:DQN-actions} show the distribution of actions taken by the REINFORCE agent and the DQN agent.
The top row of plots shows the distribution of actions selected by the agent when given access to context probabilities. The bottom row of plots shows the distribution of actions selected by the agent when given access only to the inferred most likely context. Each plot in each row corresponds to the distribution of actions in a specific range of context inference probabilities. All results are for a context inference error rate of $18\%$. 




%----------------------------------------------------


\begin{figure*}[ht]
    \centering
    \includegraphics[width=0.19\linewidth]{pictures/recover_policy/REINFORCE/hist_REINFORCE_P-H-D-V_sigma06_gamma1_d01_eps04_plot0.pdf}
    \includegraphics[width=0.19\linewidth]{pictures/recover_policy/REINFORCE/hist_REINFORCE_P-H-D-V_sigma06_gamma1_d01_eps04_plot1.pdf}
    \includegraphics[width=0.19\linewidth]{pictures/recover_policy/REINFORCE/hist_REINFORCE_P-H-D-V_sigma06_gamma1_d01_eps04_plot2.pdf}
    \includegraphics[width=0.19\linewidth]{pictures/recover_policy/REINFORCE/hist_REINFORCE_P-H-D-V_sigma06_gamma1_d01_eps04_plot3.pdf}
    \includegraphics[width=0.19\linewidth]{pictures/recover_policy/REINFORCE/hist_REINFORCE_P-H-D-V_sigma06_gamma1_d01_eps04_plot4.pdf}\\
%
    \includegraphics[width=0.19\linewidth]{pictures/recover_policy/REINFORCE/hist_REINFORCE_L-H-D-V_sigma06_gamma1_d01_eps04_plot0.pdf}
    \includegraphics[width=0.19\linewidth]{pictures/recover_policy/REINFORCE/hist_REINFORCE_L-H-D-V_sigma06_gamma1_d01_eps04_plot1.pdf}
    \includegraphics[width=0.19\linewidth]{pictures/recover_policy/REINFORCE/hist_REINFORCE_L-H-D-V_sigma06_gamma1_d01_eps04_plot2.pdf}
    \includegraphics[width=0.19\linewidth]{pictures/recover_policy/REINFORCE/hist_REINFORCE_L-H-D-V_sigma06_gamma1_d01_eps04_plot3.pdf}
    \includegraphics[width=0.19\linewidth]{pictures/recover_policy/REINFORCE/hist_REINFORCE_L-H-D-V_sigma06_gamma1_d01_eps01_plot4.pdf}
    \caption{The top row of plots shows
    the distribution of actions selected by REINFORCE when given access to context probabilities. The bottom row of plots shows
    the distribution of actions selected by REINFORCE when given access only to the inferred most likely context. }
        \label{fig:REINFORCE-actions}
\end{figure*}


%----------------------------------------------------

\begin{figure*}[ht]
    \centering
    \includegraphics[width=0.19\linewidth]{pictures/recover_policy/DQN/hist_DQN_P-H-D-V_sigma06_gamma1_d01_eps04_plot0.pdf}
    \hfill
    \includegraphics[width=0.19\linewidth]{pictures/recover_policy/DQN/hist_DQN_P-H-D-V_sigma06_gamma1_d01_eps04_plot1.pdf}
    \hfill
    \includegraphics[width=0.19\linewidth]{pictures/recover_policy/DQN/hist_DQN_P-H-D-V_sigma06_gamma1_d01_eps04_plot2.pdf}
    \hfill
    \includegraphics[width=0.19\linewidth]{pictures/recover_policy/DQN/hist_DQN_P-H-D-V_sigma06_gamma1_d01_eps04_plot3.pdf}
    \hfill
    \includegraphics[width=0.19\linewidth]{pictures/recover_policy/DQN/hist_DQN_P-H-D-V_sigma06_gamma1_d01_eps04_plot4.pdf}\\
%
    \includegraphics[width=0.19\linewidth]{pictures/recover_policy/DQN/hist_DQN_L-H-D-V_sigma06_gamma1_d01_eps04_plot0.pdf}
    \hfill
    \includegraphics[width=0.19\linewidth]{pictures/recover_policy/DQN/hist_DQN_L-H-D-V_sigma06_gamma1_d01_eps04_plot1.pdf}
    \hfill
    \includegraphics[width=0.19\linewidth]{pictures/recover_policy/DQN/hist_DQN_L-H-D-V_sigma06_gamma1_d01_eps04_plot2.pdf}
    \hfill
    \includegraphics[width=0.19\linewidth]{pictures/recover_policy/DQN/hist_DQN_L-H-D-V_sigma06_gamma1_d01_eps04_plot3.pdf}
    \hfill
    \includegraphics[width=0.19\linewidth]{pictures/recover_policy/DQN/hist_DQN_L-H-D-V_sigma06_gamma1_d01_eps01_plot4.pdf}
    \caption{The top row of plots shows
    the distribution of actions selected by DQN when given access to context probabilities. The bottom row of plots shows
    the distribution of actions selected by DQN when given access only to the inferred most likely context. }
        \label{fig:DQN-actions}
\end{figure*}


%----------------------------------------------------

\subsection{Statistical Significance of Performance Differences for scenarios P-H-D vs. L-H-D}

To formally assess the differences between agents with access to $\mathbf{p}_t$ and $l_t$, we perform unpaired t-tests over the ten repetitions for each context inference error rate. A p-value $<0.05$ indicates a statistically significant difference. The unpaired t-tests confirm that up to a context error rate of approximately 30\%, access to $\mathbf{p}_t$ results in statistically significant improvements in total reward compared to access to $l_t$. These results are presented in Tables \ref{tab:unpaired t-tests0 d=0.1 e=0.4 repeats=10} and \ref{tab:unpaired t-tests0 d=0.2 e=0.3 repeats=10}.

\subsection{Statistical Significance of Performance Differences Under Partial Observability}

We perform unpaired t-tests to formally contrast the DQN agent with the REINFORCE agent for each context inference error rate, under the partial observability condition. The performance differences are highly statistically significant with large differences in mean performance across all context inference error rates. These results are presented in Tables \ref{tab:unpaired t-tests1 d=0.1 e=0.4 repeats=10} and \ref{tab:unpaired t-tests1 d=0.2 e=0.3 repeats=10}.

%=====================================================

% Unpaired t-tests (P-H-D vs. L-H-D)
% Unpaired t-tests (REINF. vs. DQN)

\begin{table}[b]
  \centering
  \small
  \caption{Unpaired t-tests on performance for scenarios P-H-D vs. L-H-D, for different error rates, for both agents, with $\delta_d=0.1, \epsilon_d=0.4$.}
  \label{tab:unpaired t-tests0 d=0.1 e=0.4 repeats=10}
  \input{pictures/recover_t_test/t_test_xhdv_d01_ed04}
  \vspace{1cm} 
  \caption{Unpaired t-tests on performance for scenarios P-H-D vs. L-H-D, for different error rates, for both agents, with $\delta_d=0.2, \epsilon_d=0.3$.}
  \label{tab:unpaired t-tests0 d=0.2 e=0.3 repeats=10}
  \input{pictures/recover_t_test/t_test_xhdv_d02_ed03}  
\end{table}

\begin{table}[b]
  \centering
  \small
  \caption{Unpaired t-tests on performance for scenarios REINFORCE L-T vs. DQN L-T, and scenarios REINFORCE P-T vs. DQN P-T, with $\delta_d=0.1, \epsilon_d=0.4$.}
  \label{tab:unpaired t-tests1 d=0.1 e=0.4 repeats=10}
  \input{pictures/recover_t_test/t_test_xtfnv_d01_ed04}  
  \vspace{1cm}
  \caption{Unpaired t-tests on performance for scenarios REINFORCE L-T vs. DQN L-T, and scenarios REINFORCE P-T vs. DQN P-T, with $\delta_d=0.2, \epsilon_d=0.3$.}
  \label{tab:unpaired t-tests1 d=0.2 e=0.3 repeats=10}
  \input{pictures/recover_t_test/t_test_xtfnv_d02_ed03}
\end{table}


%=====================================================

%----------------------------------------------------

\subsection{Performance as a Function of Disengagement Dynamics Parameters}

For both agents, we study how the performance of learned policies varies as a function of the disengagement increment parameter $\epsilon_d$ and disengagement decay parameter $\delta_d$. The presented results correspond to various values of $\sigma$ with habituation and disengagement observed. The results for REINFORCE are in Figure \ref{fig:heatmaps REINFORCE}, and the results for DQN are in Figure \ref{fig:heatmaps DQN}. As we can see, these results show that the use of context inference probabilities improves on using most likely context inference over a wide range of settings of these variables. We note that he performance difference tends to be larger in cases that lead to a greater chance of disengagement events occurring. This corresponds to larger values of the disengagement risk increment  parameter value $\epsilon_d$ and smaller values of the disengagement risk decay parameter value $\delta_d$. For context inference error rates larger than $41\%$ ($\sigma=2$) the contrast is less apparent.

% heatmap for REINFORCE

\begin{figure*}[ht]
    \centering
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_REINFORCE_P-H-D-V_sigma04_repeats10.pdf}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_REINFORCE_L-H-D-V_sigma04_repeats10.pdf}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_REINFORCE_P-H-D-V_L-H-D-V_sigma04_repeats10.pdf}
    \hspace{2cm}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_REINFORCE_P-H-D-V_sigma06_repeats10.pdf}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_REINFORCE_L-H-D-V_sigma06_repeats10.pdf}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_REINFORCE_P-H-D-V_L-H-D-V_sigma06_repeats10.pdf}
    \hspace{2cm}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_REINFORCE_P-H-D-V_sigma08_repeats10.pdf}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_REINFORCE_L-H-D-V_sigma08_repeats10.pdf}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_REINFORCE_P-H-D-V_L-H-D-V_sigma08_repeats10.pdf}
    \hspace{2cm}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_REINFORCE_P-H-D-V_sigma1_repeats10.pdf}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_REINFORCE_L-H-D-V_sigma1_repeats10.pdf}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_REINFORCE_P-H-D-V_L-H-D-V_sigma1_repeats10.pdf}
    \hspace{2cm}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_REINFORCE_P-H-D-V_sigma2_repeats10.pdf}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_REINFORCE_L-H-D-V_sigma2_repeats10.pdf}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_REINFORCE_P-H-D-V_L-H-D-V_sigma2_repeats10.pdf}
    \caption{performance for REINFORCE, for $\sigma=0.4, 0.6, 0.8, 1, 2$, with $10$ repeats.}
    \label{fig:heatmaps REINFORCE}
\end{figure*}

\newpage

%-------------------------------------------------------------------------
% heatmap for DQN

\begin{figure*}[ht]
    \centering
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_DQN_P-H-D-V_sigma04_repeats10.pdf}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_DQN_L-H-D-V_sigma04_repeats10.pdf}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_DQN_P-H-D-V_L-H-D-V_sigma04_repeats10.pdf}
    \hspace{2cm}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_DQN_P-H-D-V_sigma06_repeats10.pdf}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_DQN_L-H-D-V_sigma06_repeats10.pdf}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_DQN_P-H-D-V_L-H-D-V_sigma06_repeats10.pdf}
    \hspace{2cm}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_DQN_P-H-D-V_sigma08_repeats10.pdf}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_DQN_L-H-D-V_sigma08_repeats10.pdf}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_DQN_P-H-D-V_L-H-D-V_sigma08_repeats10.pdf}
    \hspace{2cm}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_DQN_P-H-D-V_sigma1_repeats10.pdf}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_DQN_L-H-D-V_sigma1_repeats10.pdf}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_DQN_P-H-D-V_L-H-D-V_sigma1_repeats10.pdf}
    \hspace{2cm}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_DQN_P-H-D-V_sigma2_repeats10.pdf}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_DQN_L-H-D-V_sigma2_repeats10.pdf}
    \includegraphics[width=0.22\linewidth]{pictures/recover_heatmap/heatmap_DQN_P-H-D-V_L-H-D-V_sigma2_repeats10.pdf}
    \caption{performance for DQN, for $\sigma=0.4, 0.6, 0.8, 1, 2$, with $10$ repeats.}
    \label{fig:heatmaps DQN}
\end{figure*}



%\bibliography{uai2023-template}

\end{document}
