\section{Monotone Submodular Maximization}
\label{sec:monotone}
In this section, we address the MSMC problem under the noisy setting, where we assume the noisy sampling of the marginal gain ${\Delta f}(S,s)$ is $R$-sub-Gaussian for any $S\subseteq U$ and $s\in U$. 
Necessary definitions and notations are first given in Section \ref{sec:prelim}. We propose two algorithms \alglong (\alg) and \alglongmono (\algmono) for this problem. A detailed description of \alg is given in Section \ref{sect:alg}. The approximation and sample
complexity guarantees of \alg and \algmono are presented in Theorem \ref{mainthm} and Theorem \ref{thm:monotone2} in Section \ref{sect:analysis}. For \algmono, the algorithm description and pseudocode are provided in Section \ref{appdx:mono2} of the appendix.


% \textcolor{red}{TODO: It looks a little odd to only have subsections for this one algorithm, probably would be better to remove them for consistency.}
\subsection{Algorithm description of \alg}
\label{sect:alg}

Here we describe \alglong (\alg). \alg is based on the algorithm \thresholdlong (\threshold) of \citet{badanidiyuru2014fast} which is for \prob with an exact value oracle. Pseudocode for \alg can be found in Algorithm \ref{alg:ATG}. 

The algorithm \alg takes as input a parameter $\alpha \in (0,1)$. 
% At all times throughout \alg, there is a marginal gain threshold value $w$ (that decreases over time) and a partial solution $S$ (to which elements are iteratively added). 
\alg proceeds in $O(\log(\kappa/\alpha)/\alpha)$ \textit{rounds}, where each round corresponds to a value of $w$. The threshold $w$ is first set to $d$, which is an $\epsilon$-additive approximation of the maximum singleton value with high probability. In particular, $d$ satisfies that with probability at least $1-\delta/3$, $\max_{s\in U}f(s)+\epsilon\geq d\geq \max_{s\in U}f(s)-\epsilon$. During each round, \alg iterates through all elements in $U$. Since for each $S$ and $u$, the noisy query to the marginal gain ${\Delta f}(S,u)$ is $R$-sub-Gaussian, \alg can use \samp as the subroutine to determine whether to include $u$ to the solution set $S$. 
% More specifically, \alg calls the procedure \samplong (\samp) with threshold $w$, approximation error bound $\epsilon$, error probability $\frac{2\delta}{3nh(\alpha)}$ where $h(\alpha)=\frac{\log{(\kappa/\alpha)}}{\alpha}$, random distribution $\mathcal{D}(S,u)$, and sub-Gaussian parameter $R$ as input. 
Here $h(\alpha)=\frac{\log{(\kappa/\alpha)}}{\alpha}$. 
The worst-case query complexity $N_1$ and confidence interval $C_t$ in \samp are defined as in Theorem \ref{thm:sampling}. 
% At the end of each round, $w$ is decreased by a factor of $1-\alpha$. \threshold completes once $w$ reaches $\alpha d/\kappa$, or if $S$ has reached the cardinality constraint $\kappa$, whichever comes first. 

% \alg does not have access to a value oracle for $f$, but instead for any $S\subseteq U$ and $u\in U$ can take noisy samples of the marginal gain $\Delta f(S,u)$ via $\mathcal{D}(S,u)$, where $\mathcal{D}(S,u)$ has the properties described in Section \ref{sec:prelim}. \alg takes as input parameters $\epsilon,\alpha,\delta\in (0,1)$. $\alpha$ is analogous to that in \threshold, and controls by how much the threshold decreases at each round. $\delta$ is the probability that the theoretical guarantees will hold if we run \alg. $\epsilon$ is how closely we approximate the true function value in the worst case.[TODO: maybe some definitions of parameters can be described in the preliminary section]

 % This is accomplished by taking $N_2=2R^2\log(6n/\delta)/(\epsilon^2)$ samples from $\mathcal{D}(\emptyset,u)$ for all $u\in U$ and choosing the highest sample average marginal gain. 

% In a similar manner to \threshold, \alg proceeds in rounds corresponding to thresholds $w$ and passes through the universe $U$. But instead of simply checking whether each $u\in U$ has marginal gain $\Delta f(S,u)\geq w$, \alg calls the procedure \samplong (\samp) with threshold $w$, approximation error bound $\epsilon$, error probability $\frac{2\delta}{3nh(\alpha)}$ where $h(\alpha)=\frac{\log{(\kappa/\alpha)}}{\alpha}$, random distribution $\mathcal{D}(S,u)$, and sub-Gaussian parameter $R$ as input. 
% The main results of this algorithm are two-fold. With probability $1-\delta$, the following two conclusions hold:
% \begin{itemize}
%     \item  The value of function $f$ of the output set $S$ satisfies that $f(S)\geq (1-e^{-1}-\epsilon)f(OPT)$.
%     \item  The number of queries the algorithm takes is upper bounded by $O(N_2+...)$ where $N_2$ is the number of queries required by line \ref{alg:max_sing step} in Algorithm \ref{alg:ATG}.
% \end{itemize}
% The algorithm is composed of two parts: in the first part, we
% calculates the marginal gain of adding different elements for multiple iterations. 
%At each iteration, the algorithm approximates the marginal gain of adding each element $s$ in the universe to the current solution $S$ by invoking TAMG($w$, $\epsilon$, $\delta$, $S$, $s$) (see the subroutine Algorithm \ref{alg:TAMG}). In TAMG($w$, $\epsilon$, $\delta$, $S$, $s$), the algorithm adds the element if $\Delta f(S,s)>w-\epsilon$. Notice that by setting the confidence interval to $\epsilon$ in the Hoeffding's inequality, we can easily obtain the required number of samples to achieve the objective. However, this approach is highly inefficient and we develop an adaptive sampling method. For each time step $t$, we apply the Hoeffding inequality and we know that the sampled average $\deltafe$ can be bounded in the range of $[\Delta f(S,s)-C_t,\Delta f(S,s)+C_t]$. Let us denote $UCB(S,s)=\Delta f(S,s)+C_t$, and that $LCB(S,s)=\Delta f(S,s)-C_t$. Intuitively, if the $LCB(S,s)$ is higher than $w-\epsilon$, then it is added to the solution set $S$. 
\begin{algorithm}[t]
\caption{\alglong (\alg)}\label{alg:ATG}
 \begin{algorithmic}[1]
 \STATE \textbf{Input:} $\epsilon$, $\delta, \alpha$
 % \STATE  Define $\epsilon':=\frac{\epsilon}{6\kappa}$
 \STATE $N_2\gets 2R^2\log(6n/\delta)/(\epsilon^2)$
 \FORALL{$s\in U$}
 \STATE $\hat{f}(s) \gets $ sample mean over $N_2$ samples from $\mathcal{D}(\emptyset,s)$\label{alg:ATG:line:sample-mean}
 \ENDFOR
  
  \STATE $d:=\max_{s\in U}\hat{f}(s)$, 
 % using BAI to approximate $\max_{s\in S}f(s)$, i.e., $(1+\epsilon)\max_{s\in U}f(s)\geq d\geq (1-\epsilon)\max_{s\in U}f(s)$.
 \STATE $w\gets d$, $S\gets \emptyset$
 \WHILE{$w>\alpha d/\kappa$}
 \FORALL{$u\in U$} 
\IF{$|S|<\kappa$}
 \STATE thre = \samplong($w$, $\epsilon$, $\frac{2\delta}{3nh(\alpha)}$, $\mathcal{D}(S,u)$, $R$)
 \IF{thre}
 \STATE $S\gets S\cup \{u\}$
 \ENDIF
  \ENDIF
 \ENDFOR

 \STATE $w=w(1-\alpha)$
 \ENDWHILE
 \STATE \textbf{return} $S$
 \end{algorithmic}
\end{algorithm}

% \begin{algorithm}
% \caption{\samplong (\samp)}
% \label{alg:TAMG}
% \begin{algorithmic}[1]
%     \STATE \textbf{Input:} $w$, $\epsilon$, $\delta$, $S$, $u$
%     \STATE $N_1=R^2\log(6nh(\alpha)/\delta)/(2\epsilon^2)$ where $h(\alpha)=\frac{\log{\kappa/\alpha}}{\alpha}$
%     \FOR{$t=1,2,...N_1$}\label{line:sample N_1}
%         \STATE $\widehat{\Delta f_t}(S,u)\gets$ updated sample mean after taking $t$-th sample from $\mathcal{D}(S,u)$
%         \STATE update $\conf =R\sqrt{\frac{\log \frac{12nh(\alpha) t^2}{\delta}}{2t}}$ \label{alg: update confidence interval}
%  %\STATE Sample $\Delta f_t(S,s)$
%  %\STATE Calculate $\widehat{\Delta f_t}(S,s)=\frac{\sum_{i=1}^t\Delta f_i(S,s)}{t}$
%         \IF{$\widehat{\Delta f_t}(S,u)-\conf \geq w-\epsilon$} \label{line: comparison to thres 1}
%             \STATE \textbf{return true}
%         \ELSIF{$\widehat{\Delta f_t}(S,u)+\conf \leq w+\epsilon$} \label{line: comparison to thres 2}
%             \STATE \textbf{return false}
%         \ENDIF
%  \ENDFOR
%  \IF{$\widehat{\Delta f_t}(S,u)\geq w$}
%  \STATE \textbf{return true}
%  \ELSE
%  \STATE \textbf{return false}
%  \ENDIF
% \end{algorithmic}
% \end{algorithm}
