In this section, we present our theoretical results on Missing at Random (MAR) and Missing Not at Random (MNAR) environments for both Missing Outcome and Missing Mediator cases.

\subsection{Missing at Random (MAR)}
As discussed earlier, the identification of \( \mu_a = \mathbb{E}[Y \mid a] \) is given by:

\[
\mu_a = \sum\limits_{m \in \mathbb{M}} \mathbb{P}(M = m \mid a, O^M = 1) \mathbb{E}[Y \mid M = m, a, O^Y = 1, O^M = 1].
\]

Using this identification, we will prove the following theorem. We define \( p_{m, a} = \mathbb{P}(M = m, a), \gamma_{m, a} = \mathbb{P}(O^Y = 1 \mid M = m, a), \lambda_a = \mathbb{P}(O^M = 1 \mid a) \). Our algorithm is exactly like \ref{alg:mar_algorithm2} where if \( M \) is missed we don't update anything.

\begin{restatable}{theorem}{theoMmismarsupper} \label{theo:M_miss_upper}
\text{(Regret bound for MAR with missing mediator and outcome)} Under Assumption \ref{ass:mar_my_assumption},
for every \( \alpha > 1 \), the following regret bound holds for sufficiently large \( T \):
\[
\mathbb{E}[R_T] = O\left( \sqrt{\alpha T \log(T) n S} \right),
\]
where \( P_a = \sum_{m \in \mathcal{M}} \frac{p_{m,a}}{\gamma_{m,a} \lambda_a} \) and \( S \coloneqq \frac{1}{\vert \mathcal{A}\vert} \sum\limits_{a\in\mathcal{A}} P_a \).
\end{restatable}

\begin{proof}
    The proof follows a similar approach to the proof of Theorem \ref{theo:mar_upper_2}. Using the same reasoning, we have (where \( T_{m, a, o_Y} \) is the number of times \( M = m \) and the reward are observed when pulling arm \( a \)):
    \[
    \left| \sum\limits_{m \in \mathbb{M}} \hat{p}_{m, a} \hat{\mu}_{m, a} - \mu_a \right| \leq 8\sqrt{\frac{\alpha \log(T)}{2} \sum\limits_{m \in \mathbb{M}} \frac{\hat{p}_{m, a}^2}{T_{m, a, o_Y}}}.
    \]
    Similarly, we also have the following inequality (where \( T_{a, o_M} \) is the number of times \( M = m \) is observed when pulling arm \( a \)):
    \[
    \left| p_{m,a} - \hat{p}_{m, a} \right| \leq \sqrt{\frac{\alpha \log(t)}{2T_{a, o_M}}}.
    \]
    Additionally, we have:
    \[
    \left| \frac{T_{a, o_M}}{T_a} - \lambda_a \right| \leq \sqrt{\frac{\alpha \log(t)}{2T_a}}.
    \]
    Under this event, for sufficiently large \( T \), we have \( T_a \geq \log(T)^2 \) and \( \frac{p_{m, a}}{2} \leq \hat{p}_{m, a} \leq 2 p_{m, a} \). Following similar steps, we get:
    \[
    T_{m, a, o_Y} \geq \frac{p_{m, a} \gamma_{m, a} \lambda_a T_a}{4}.
    \]
    Therefore, using the same definition of \( \epsilon_a \), we obtain:
    \[
    \epsilon_a \leq 8\sqrt{\frac{8\alpha \log(T)}{2} \sum\limits_{m \in \mathbb{M}} \frac{p_{m, a}^2}{p_{m, a} \gamma_{m, a} \lambda_a T_a}} = 8\sqrt{\frac{8\alpha \log(T)}{2} \sum\limits_{m \in \mathbb{M}} \frac{p_{m, a}}{\gamma_{m, a} \lambda_a T_a}}.
    \]
    Finally, following the same steps as in previous proofs, we conclude:
    \[
    \mathbb{E}[R_T] = O\left( \sqrt{\alpha T \log(T) n S} \right).
    \]

P.S.: By "sufficiently large" \( T \), we mean a \( T \) that is large enough to satisfy \( \log(T) > C \) for some constant \( C \).
\end{proof}

\subsection{Missing Not at Random (MNAR)}
In this section, we use the identification formula discussed earlier to develop an algorithm and establish an upper bound for this environment. Assume that  
\( \mathbb{P}(m \mid a) = p_{m, a} \), \( \mathbb{P}(O^M = 1 \mid m, a) = \lambda_{m, a} \), \( \mathbb{P}(O^Y = 1 \mid m, a) = \gamma_{m, a} \). Also define \( \lambda_a = \min\limits_{m} \lambda_{m, a} \). We assume a similar condition to Assumption~\ref{ass:mnar_K_assumption}, but for a different matrix. Let \( \Theta_a = [\mathbb{P}(m, y, O^M = 1, O^Y = 1 \mid a)]_{K \times L} \):

\begin{assumption}[Bounded condition number]\label{ass:missing_M_mnar_K_assumption}
For each arm \( a \in \mathcal{A} \), the condition number of the matrix \( \Theta_a \) is bounded by:
\[
    \kappa(\Theta_a) \leq C_a,
\]
where \( \kappa(\Theta_a) \) denotes the condition number of \( \Theta_a \) with respect to the $\infty$-norm, defined as 
\[
    \kappa(\Theta_a) = \lVert \Theta_a \rVert_\infty \lVert \Theta_a^{\dagger} \rVert_\infty,
\] with $\Theta_a^{\dagger}$ being the pseudo-inverse of \( \Theta_a \).
\end{assumption}

In our algorithm we use the the given identification formula and 
\[ 
\text{UCB}(a) = \hat{\mu}_a + 2 \sqrt{\frac{\alpha \log(T)}{2T_a}} \left( 8 \frac{C_a}{\norm{\hat{\Theta}_a}} \frac{K}{\hat{\lambda}_a} + \frac{1}{\hat{\lambda}_a} \right) + \sqrt{\frac{\alpha \log(T)}{2} \sum\limits_{m \in \mathbb{M}} \frac{4 \hat{p}_{m, a}^2}{T_{a, m, o_Y}}}.
\]

we will prove the following theorem.
\begin{restatable}{theorem}{theoMmismnarsupper} \label{theo:M_miss_mnar_upper}
\text{(Regret bound for MNAR with missing mediator and outcome)} Under Assumptions \ref{ass:mnar_my_assumption}, \ref{as:complete2}, and \ref{ass:missing_M_mnar_K_assumption}, for every \( \alpha > 1 \), the following regret bound holds for sufficiently large \( T \):
\[
    \mathbb{E}[R_T] = O\left( \sqrt{\alpha T \log(T) n S} \right).
\]
where \( S_a = \max 
    \left(
    \left( 32 \frac{C_a}{\norm{\Theta}_a} \frac{K}{\lambda_a} + \frac{2}{\lambda_a} \right), 
    \sqrt{\sum\limits_{m \in \mathbb{M}} \frac{32 p_{m, a}}{\lambda_{m, a} \gamma_{m, a}}}
    \right) 
    \),
    \(
    S = \frac{\sum\limits_{a} S_a^2}{n}
    \)
\end{restatable}

\begin{proof}
    The proof closely follows the reasoning from Theorem~\ref{theo:mnar_upper}. Let:
    \begin{align*}
        &b_a = [\mathbb{P}(y, O^M = 0, O^Y = 1 \mid a)]_{L \times 1}, \\
        &\Theta_a = [\mathbb{P}(m, y, O^M = 1, O^Y = 1 \mid a)]_{K \times L}, \\
        &x_a = \left[\frac{\mathbb{P}(O^M = 0 \mid m, a)}{\mathbb{P}(O^M = 1 \mid m, a)}\right]_{K \times 1}.
    \end{align*}
    We know that \( \Theta_a x_a = b_a \). Using the same approach as in Theorem~\ref{theo:mnar_upper}, we derive the following inequality for \( \epsilon = \sqrt{\frac{\alpha \log(T)}{2T_a}} \):
    \[
        \norm{x - \hat{x}} \leq 2 \epsilon \norm{\Theta_a^{-1}} \left(1 + K \frac{1 - \lambda_a}{\lambda_a}\right) \leq 2 \epsilon \norm{\Theta_a^{-1}} \frac{K}{\lambda_a} \leq 4 \epsilon \frac{C_a}{\norm{\hat{\Theta}_a}} \frac{K}{\lambda_a} \leq 8 \epsilon \frac{C_a}{\norm{\hat{\Theta}_a}} \frac{K}{\hat{\lambda}_a}.
    \]
    Additionally, since \( x = \left[\frac{1 - \lambda_{m, a}}{\lambda_{m, a}}\right]_{K \times 1} \), we have:
    \[
        \left|\frac{1}{\lambda_{m, a}} - \frac{1}{\hat{\lambda}_{m, a}}\right| \leq 8 \epsilon \frac{C_a}{\norm{\hat{\Theta}_a}} \frac{K}{\hat{\lambda}_a}.
    \]
    
    Furthermore, for \( p_{m, 1 \mid a} = \mathbb{P}(M = m, O^M = 1 \mid a) \), we have:
    \[
        \left| p_{m, 1 \mid a} - \hat{p}_{m, 1 \mid a} \right| \leq \epsilon.
    \]
    Using a similar approach to the proof of Theorem~\ref{theo:mnar_upper}, we obtain:
    \[
        \left|\frac{p_{m, 1 \mid a}}{\lambda_{m, a}} - \frac{\hat{p}_{m, 1 \mid a}}{\hat{\lambda}_{m, a}}\right| \leq p_{m, 1 \mid a} \left( 8 \epsilon \frac{C_a}{\norm{\hat{\Theta}_a}} \frac{K}{\hat{\lambda}_a} \right) + \frac{1}{\hat{\lambda}_a} \epsilon = \epsilon \left( 8 p_{m, 1 \mid a} \frac{C_a}{\norm{\hat{\Theta}_a}} \frac{K}{\hat{\lambda}_a} + \frac{1}{\hat{\lambda}_a} \right).
    \]
    
    Therefore, we can conclude:
    \[
    \left| \hat{p}_{m, a} - p_{m, a} \right| \leq \epsilon \left( 8 p_{m, 1 \mid a} \frac{C_a}{\norm{\hat{\Theta}_a}} \frac{K}{\hat{\lambda}_a} + \frac{1}{\hat{\lambda}_a} \right).
    \]
    
    Additionally, we have the following bound for \( T_{a, o_M, o_Y} \) (the number of times both \( M \) and \( Y \) are observed):
    \[
    \left| \hat{\mu}_{m, a} - \mu_{m, a} \right| \leq \sqrt{\frac{\alpha \log(T)}{T_{a, o_M, o_Y}}}.
    \]
    Using \( \mathbb{P}(O^Y = 1, O^M = 1 \mid a) = \sum\limits_{m \in \mathbb{M}} p_{m, a} \gamma_{m, a} \lambda_{m, a} \), we have:
    \[
    \left| \frac{T_{a, o_M, o_Y}}{T_a} - \sum\limits_{m \in \mathbb{M}} p_{m, a} \gamma_{m, a} \lambda_{m, a} \right| \leq \sqrt{\frac{\alpha \log(T)}{T_a}},
    \]
    which gives \( T_{a, o_M, o_Y} \geq \frac{T_a}{2} \left( \sum\limits_{m \in \mathbb{M}} p_{m, a} \gamma_{m, a} \lambda_{m, a} \right) \). Thus, for sufficiently large \( T \) and \( T_a \geq \log(T)^2 \), we have \( \hat{\mu}_{m, a} \leq 2 \mu_{m, a} \leq 2 \).
    
    Therefore:
    \begin{align}
    \left| \sum\limits_{m \in \mathbb{M}} \hat{p}_{m, a} \hat{\mu}_{m, a} - \mu_a \right| 
    &= \left| \sum\limits_{m \in \mathbb{M}} (\hat{p}_{m, a} - p_{m, a}) \hat{\mu}_{m, a} + \sum\limits_{m \in \mathbb{M}} p_{m, a} \hat{\mu}_{m, a} - \mu_a \right| \\
    &\leq \sum\limits_{m \in \mathbb{M}} \left| \hat{p}_{m, a} - p_{m, a} \right| \hat{\mu}_{m, a} + \left| \sum\limits_{m \in \mathbb{M}} p_{m, a} \hat{\mu}_{m, a} - \mu_a \right| \\
    &\leq 2 \sum\limits_{m \in \mathbb{M}} \epsilon \left( 8 p_{m, 1 \mid a} \frac{C_a}{\norm{\hat{\Theta}_a}} \frac{K}{\hat{\lambda}_a} + \frac{1}{\hat{\lambda}_a} \right) + \left| \sum\limits_{m \in \mathbb{M}} p_{m, a} \hat{\mu}_{m, a} - \mu_a \right| \quad \text{(using \( \sum\limits_{m \in \mathbb{M}} p_{m, 1 \mid a} \leq 1 \))} \\
    &\leq 2 \epsilon \left( 8 \frac{C_a}{\norm{\hat{\Theta}_a}} \frac{K}{\hat{\lambda}_a} + \frac{1}{\hat{\lambda}_a} \right) + \left| \sum\limits_{m \in \mathbb{M}} p_{m, a} \hat{\mu}_{m, a} - \mu_a \right|.
    \end{align}
    
    Using the same technique as before, we have the following inequality for \( T_{a, m, o_Y} \) (the number of times \( M = m \) and the reward are observed when pulling arm \( a \)):
    \[
    \left| \sum\limits_{m \in \mathbb{M}} p_{m, a} \hat{\mu}_{m, a} - \mu_a \right| \leq \sqrt{\frac{\alpha \log(T)}{2} \sum\limits_{m \in \mathbb{M}} \frac{4 \hat{p}_{m, a}^2}{T_{a, m, o_Y}}}.
    \]
    
    Therefore:
    \begin{align*}
    \left| \sum\limits_{m \in \mathbb{M}} \hat{p}_{m, a} \hat{\mu}_{m, a} - \mu_a \right| 
    &\leq 2 \epsilon \left( 8 \frac{C_a}{\norm{\hat{\Theta}_a}} \frac{K}{\hat{\lambda}_a} + \frac{1}{\hat{\lambda}_a} \right) + \sqrt{\frac{\alpha \log(T)}{2} \sum\limits_{m \in \mathbb{M}} \frac{4 \hat{p}_{m, a}^2}{T_{a, m, o_Y}}}
    \end{align*}
    which proves our UCB upper bound.
    
    Similarly, we know that \( T_{a, m, o_Y} \geq \frac{T_a}{2} p_{m, a} \lambda_{m, a} \gamma_{m, a} \), which gives:
    \begin{align*}
    |\mu_a - \hat{\mu_a}| 
    &\leq 2 \epsilon \left( 8 \frac{C_a}{\norm{\hat{\Theta}_a}} \frac{K}{\hat{\lambda}_a} + \frac{1}{\hat{\lambda}_a} \right) + \sqrt{\frac{\alpha \log(T)}{2} \sum\limits_{m \in \mathbb{M}} \frac{32 p_{m, a}^2}{T_a p_{m, a} \lambda_{m, a} \gamma_{m, a}}} \\
    &= 2 \epsilon \left( 8 \frac{C_a}{\norm{\hat{\Theta}_a}} \frac{K}{\hat{\lambda}_a} + \frac{1}{\hat{\lambda}_a} \right) + \sqrt{\sum\limits_{m \in \mathbb{M}} \frac{32 p_{m, a}}{\lambda_{m, a} \gamma_{m, a}}} \\
    &\leq 2\epsilon \max 
    \left(
    \left( 32 \frac{C_a}{\norm{\Theta}_a} \frac{K}{\lambda_a} + \frac{2}{\lambda_a} \right), 
    \sqrt{\sum\limits_{m \in \mathbb{M}} \frac{32 p_{m, a}}{\lambda_{m, a} \gamma_{m, a}}}
    \right).
    \end{align*}
    
    Following the same reasoning as in previous proofs, we conclude:
    \[
    \mathbb{E}[R_T] = O\left( \sqrt{\alpha T \log(T) \sum\limits_{a} S_a^2} \right) = O\left( \sqrt{\alpha T \log(T) n S} \right).
    \]
\end{proof}
