% \documentclass{uai2023} % for initial submission
\documentclass[accepted]{uai2023} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like

%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2023} % ptmx math instead of Computer
% Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2023} % newtx fonts (improves upon
 % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams


%======================================================================
%packeges and commands input by us
\usepackage{hyperref}
\usepackage{amsfonts}
\usepackage{amsmath}
\usepackage{mathtools}
\usepackage{amssymb}
\usepackage{algorithm}
\usepackage{algpseudocode}
\usepackage{mathrsfs}
\usepackage{multirow} 
\usepackage[capitalize,noabbrev]{cleveref}
\DeclarePairedDelimiter{\ceil}{\lceil}{\rceil}
\DeclarePairedDelimiter\floor{\lfloor}{\rfloor}
\DeclareMathOperator*{\argmax}{arg\,max} 
\DeclareMathOperator*{\argmin}{arg\,min} 
\newcommand{\cmin}{c_\mathrm{min}}

\usepackage{amsthm}

\theoremstyle{plain}
\newtheorem{theorem}{Theorem}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{corollary}[theorem]{Corollary}
\theoremstyle{definition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{assumption}[theorem]{Assumption}
% \theoremstyle{remark}
\newtheorem{remark}[theorem]{Remark}
\newtheorem{fact}[theorem]{Fact}

\usepackage{natbib}

\usepackage{xcolor}
\usepackage{soul}
\newcommand{\cjq}[1]{{\color{blue} #1 \color{black}}}
\newcommand{\ngy}[1]{{\color{cyan}#1\color{black}}}

%======================================================================



% for cross referencing the main text
% PLEASE ONLY USE xr IN THE SUPPLEMENTARY MATERIAL. 
% In the main paper, hard code any cross-reference to the supplementary material. 
\usepackage{xr}
\makeatletter

\newcommand*{\addFileDependency}[1]{% argument=file name and extension
\typeout{(#1)}% latexmk will find this if $recorder=0
% however, in that case, it will ignore #1 if it is a .aux or 
% .pdf file etc and it exists! If it doesn't exist, it will appear 
% in the list of dependents regardless)
%
% Write the following if you want it to appear in \listfiles 
% --- although not really necessary and latexmk doesn't use this
%
\@addtofilelist{#1}
%
% latexmk will find this message if #1 doesn't exist (yet)
\IfFileExists{#1}{}{\typeout{No file #1.}}
}\makeatother

\newcommand*{\myexternaldocument}[1]{%
\externaldocument{#1}%
\addFileDependency{#1.tex}%
\addFileDependency{#1.aux}%
}
%------------End of helper code--------------
 
\myexternaldocument{nie_646}

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{Size-Constrained k-Submodular Maximization in Near-Linear Time\\(Supplementary Material)}

% The standard author block has changed for UAI 2023 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{\href{mailto:<nieg@iastate.edu>?Subject=Your k-submodular UAI 2023 paper}{Guanyu Nie}{}}
\author[1]{\href{mailto:<yanhui@iastate.edu>?Subject=Your k-submodular UAI 2023 paper}{Yanhui Zhu}{}}
\author[1]{\href{mailto:<yididiya@iastate.edu>?Subject=Your k-submodular UAI 2023 paper}{Yididiya Y. Nadew}{}}
\author[1]{\href{mailto:<sbasu@iastate.edu>?Subject=Your k-submodular UAI 2023 paper}{Samik Basu}{}}
\author[1]{\href{mailto:<pavan@iastate.edu>?Subject=Your k-submodular UAI 2023 paper}{A. Pavan}{}}
\author[1]{\href{mailto:<cjquinn@iastate.edu>?Subject=Your k-submodular UAI 2023 paper}{Christopher John Quinn}{}}
% Add affiliations after the authors
\affil[1]{%
    Computer Science Deptartment\\
    Iowa State University\\
    Ames, IA, USA
}
  
  \begin{document}
  
\onecolumn %% Turn this off if single column is desired for the supplement
\maketitle


\appendix

\section{Threshold Greedy -- Individual Size Constraints} \label{supp:is}

In this section, we will prove \cref{thm:main2}.  We first recall the statement.

% \input{sections/alg-is.tex}

% \cref{alg:k-sub-ts}


\paragraph{\cref{thm:main2}:} \cref{alg:k-sub-is} runs in $O(kn\varepsilon^{-1}\log (B\varepsilon^{-1}))$ and guarantees a $(1/3-\varepsilon)$-approximation.

% \paragraph{\cref{thm:main2}:} %\label{thm:k-sub-is}
%     \cref{alg:k-sub-is} runs in $O(kn\varepsilon^{-1}\log (B\varepsilon^{-1}))$ and guarantees a $(1/3-\varepsilon)$-approximation.
% % \end{theorem}

\begin{proof} We first prove the run-time and then the approximation ratio.

% \cjq{maybe we should discuss query complexity instead of run-time, then mention run-time dominated by value queries}

\paragraph{Run-time:} The \textbf{for} loop runs over all item-type pairs that could  feasibly be added to $S$, thus taking $\mathcal{O}(nk)$ time each call. The number of times the outer \textbf{while} loop is called is equal to the smallest integer $t'$ such that $(1-\varepsilon)^{t'} d \leq \frac{(1-\varepsilon)\varepsilon d}{3B}$.  Let $t$ denote the value where equality holds, so $t' = \lceil t \rceil$.  Rearranging, $t$ satisfies    
\begin{align}
    t \log (1-\varepsilon)%
    %
    &= \log (1-\varepsilon) - \log (3B\varepsilon^{-1}) \nonumber\\
    %
    \Longleftrightarrow \qquad %
    %
    t &= 1 - \frac{\log (3B\varepsilon^{-1})}{\log (1-\varepsilon)} \tag{ $\log (1-\varepsilon)<0$}\nonumber\\
        %
    &\leq 1+ \frac{\log (3B\varepsilon^{-1})}{\varepsilon} \tag{using the fact that  $\log (1-x)<-x$ for $x<1$}\nonumber\\
        %
    \Longleftrightarrow \qquad %
    %
    t' = \lceil t \rceil &\leq 2+ \frac{\log (3B\varepsilon^{-1})}{\varepsilon} . \nonumber 
\end{align}
Thus, with $\mathcal{O}(\varepsilon^{-1}\log (B\varepsilon^{-1}))$ calls of the outer \textbf{while} loop, the total run time is  $\mathcal{O}(nk\varepsilon^{-1}\log (B\varepsilon^{-1}))$.

\paragraph{Approximation Guarantee:} The analysis for \cref{alg:k-sub-is}'s performance with individual size (IS) constraints will, broadly speaking, resemble the analysis of \cref{alg:k-sub-ts}'s performance for total size (TS) constraints.  Like in the proof for \cref{thm:main1}, we will construct a sequence of feasible solutions and relate objective value differences between successive pairs of solutions to the marginal gains achieved with each item-type pair added in \cref{alg:k-sub-is}.  However, the construction will require more care as the swapping pairs must respect the constraints of each type, not simply the cardinality of $B$.  Note that while we will reuse ``$B$'' as $B\gets\sum_{i=1}^k B_i$. %, as noted in Section\dots, \cjq{Define problems, then cref} 
% We recall that neither \cref{problem:IS} Problem TS nor Problem IS is a special case of the other.  

% \cjq{we use $S$ both as a generic dummy variable and as the solution output by the algorithms.  Perhaps we can use $S^\circ$ or some other decoration to distinguish the output}

% \cjq{in formal problem defs, redefine $\mathcal{S}$ based on constraint types}

% For a feasible solution $S \in \mathcal{S}$, for each type $i\in [k]$, let  $U_i(S)\gets \{e| (e,i)\in S\} $ denote the set of items that in solution $S$ is assigned type $i$.  
We will first consider the case that \cref{alg:k-sub-is} outputs a maximal solution, one that for each type $i\in[k]$ has $B_i$ elements assigned that type.  We will then consider the general case.  Also, we consider that $\mathrm{OPT}$ is also maximal, $|U_i(\mathrm{OPT})|=B_i$ for all $i\in [k]$.  This is without loss of generality as the monotonicity of $f$ implies that if there is an optimal solution with less than maximal cardinality, we can add in elements to it without a decrease in value. 

\textbf{Case 1:} The final solution $S^\circ$ satisfies $|U_i(S^\circ)|=B_i$ for all $i\in [k]$. 


% Denote $U_i(S)$ as the items in $S\in \mathscr{S}$ having index $i$. Denote $I_j$ as the set $I$ considered by the algorithm when selecting $j$-th element. We consider two cases.

% \textbf{Case 1:} when the final selected $S$ satisfies $|U_i(S)|=B_i$ for all $i\in [k]$. 

We reuse the notation $(e_j, i_j)$ for the $j$th item-type pair that was added to $S^\circ = \{ (e_1,i_1), \dots, (e_B,i_B)\}$ by \cref{alg:k-sub-is}. Similar to the total size case, we will again construct several sequences combining $S$ and $\mathrm{OPT}$ to show inequalities resulting in the stated approximation bound.  We next let $S_j$ denote $S$ after $j$ elements were added, so $S_j := \{ (e_1,i_1), \dots, (e_j,i_j)\}$ and we set $S_0:=\emptyset$ as the initial empty set.  Thus by construction \begin{align}
    f(S_{j+1}) - f(S_j) = f((e_{j+1},i_{j+1})|S_j) .\nonumber
\end{align}

We next index the item-type pairs in the optimal solution $\mathrm{OPT} = \{ (e_1',i_1'), \dots, (e_B',i_B')\}$. If the item $e_j$ in the $j$th pair $(e_j,i_j) \in S^\circ$ of the output is also in a pair $(e_j,i')$ in $\mathrm{OPT}$, the latter pair should have the same index. For other pairs in  $\mathrm{OPT}$, the indexing is arbitrary.  With this alignment of indices of pairs in $S^\circ$ and $\mathrm{OPT}$ that share a common item, we construct a sequence of cardinality $B$ sets $O_0$, $O_1$, \dots, $O_B$.  We will not be able to simply swap pairs in a single position, as we did in the proof for \cref{thm:main1}, but will need to swap additional pairs to maintain feasibility of with respect to all type constraints $\{B_i\}_{i=1}^k$.  Like in the TS case, we will want the beginning and end of the sequence of feasible solutions to match the optimal and output solutions, %
%
\begin{align*}
    O_0 :=\mathrm{OPT} =& \{ (e_1', i_1'), (e_2', i_2'), \dots, (e_{B-1}',i_{B-1}'), (e_B',i_B')   \} \nonumber\\
    O_B := S^\circ =& \{ (e_1, i_1), (e_2, i_2), \dots, (e_{B-1},i_{B-1}), (e_B,i_B)   \} \nonumber
\end{align*}  We will construct the sequence beginning with $O_0=\mathrm{OPT}$.  If we construct $O_{1}$ by replacing the first pair in $O_0$ with the first pair in $S^\circ$,
\begin{align*}
   \{ (e_1, i_1), (e_2', i_2'), \dots, (e_{B-1}',i_{B-1}'), (e_B',i_B')   \}, \nonumber
\end{align*}
if the types in those two pairs were different, $i_1 \neq i_1'$, then since $\mathrm{OPT}$ already had $B_{i_1}$ elements of type $i_1$, it would now have $B_{i_1}+1$ elements of type $i_1$, thus violating the constraint.  To ensure that $O_{1}$ is feasible, we first swap types for two pairs in $\mathrm{OPT}$ to make sure the first pair has type $i_1$, and then swap elements in the first pair to match $S_1$.  Let $\ell$ denote any index of any pair in $\mathrm{OPT}$ with type $i_1$ (even $\ell=1$ if we are in the safe case that the types already matched $i_1 = i_1'$; the following inequalities will still hold).  We introduce $O_{0+1/2}$ that swaps types, before swapping elements to construct $O_{1}$, %
%
\begin{align*}
    O_0 :=\mathrm{OPT} =& \{ (e_1', i_1'), (e_2', i_2'), \dots,(e_\ell', i_\ell'=i_1),\dots, (e_B',i_B')   \} \nonumber\\
    %
    \nonumber\\
    %
    O_{0+1/2} :=& \{ (e_1', \cjq{i_\ell'=i_1}), (e_2', i_2'), \dots,(e_\ell', \cjq{i_1'}),\dots, (e_B',i_B')   \} \tag{swap types of pairs $1$ and $\ell$}\\
    %
    \nonumber\\
    %    
    O_1 :=& \{ (\cjq{e_1}, \cjq{i_1}), (e_2', i_2'), \dots,(e_\ell', \cjq{i_1'}),\dots, (e_B',i_B')   \}.\tag{swap element in pair $1$}    
\end{align*} 
To construct $O_2$, we will do a similar set of swaps.  Let $h\in\{2,\dots,B\}$ denote the index of a pair in $O_1$ with type $i_2$ (the same type as the second element $(e_2,i_2)$ added greedily to $S^\circ$ has). %  
%
\begin{align*}
    O_1 :=& \{ (e_1, i_1), (e_2', i_2'), \dots,(e_\ell', i_1'),\dots, (e_h', i_h'=i_2),\dots,(e_B',i_B')   \} \nonumber\\
    %
    \nonumber\\
    %
    O_{1+1/2} :=& \{ (e_1, i_1), (e_2', \cjq{i_h'=i_2}),\dots,(e_\ell', i_1'),\dots, (e_h', \cjq{i_2'}),\dots,(e_B',i_B')   \} \tag{swap types of pairs $2$ and $h$}\\
    %
    O_2 :=& \{ (e_1, i_1), (\cjq{e_2}, \cjq{i_2}), (e_2', i_2'), \dots,(e_\ell', i_1'),\dots, (e_h', \cjq{i_2'}),\dots,(e_B',i_B')   \}.\tag{swap element in pair $2$}    
\end{align*} 

We continue in this fashion, while constructing $O_{j+1}$ for $j\in\{1,\dots,B-1\}$ looking for a pair in $O_j$ with type $i_{j+1}$ (i.e. the same type as in the $(j+1)$st pair added to $S^\circ$) among indices $r\in\{j+1, \dots, B\}$.  Since in swapping we match types to align with the types in $S_{j+1}$, and $S^\circ$ was feasible, there will always be such an index $r$. As noted, if the types are already aligned in position $j+1$ in $O_j$, and $r$ is chosen as $j+1$, then $O_j = O_{j+1/2}$ and the following work will still hold (some inequalities will be loose).

Note that by construction, for $j\in\{0,\dots,B-1\}$ we have $S_j \subseteq O_j \cap O_{j+1/2} \cap O_{j+1}$. 

We now consider the difference $f(O_j) - f(O_{j+1})$.  This is not a marginal gain since neither set contains the other.  However, since the sets differ in the $(j+1)$st index and possibly one more index $r\in\{j+1,\dots,B\}$, we will be able to upper bound the difference in terms of the marginal gain $f(S_{j+1}) - f(S_j) $ achieved by \cref{alg:k-sub-is} in adding the $(j+1)$st element $(e_{j+1},i_{j+1})$. 


Let $r\in\{j+1,\dots,B\}$ denote the index of the pair in $O_j$ that we swapped types with the $(j+1)$st pair.

% If $r=j+1$, then the type in the $(j+1)$st pair of $O_j$ matched that of the greedy solution $S^\circ$, then $O_j= O_{j+1/2}$ so trivially $f(O_j) - f(O_{j+1/2}) = 0$. 

As types may have been changed in positions $j+1$ and/or $r$ multiple times due to previous swaps while constructing $\{O_1,\dots,O_j\}$, let $(e_{j+1}',\tilde{i}_{j+1})$ and $(e_r',\tilde{i}_r)$ denote the pairs in those positions in $O_j$.  Those pairs have the same items as in $O_0=\mathrm{OPT}$ but the types may differ due to previous swaps.  In general, we will have
\begin{align}
    f(O_j) - f(O_{j+1}) &= \big( f(O_j\cap O_{j+1}) + f( \{(e_{j+1}',\tilde{i}_{j+1}), (e_r',\tilde{i}_r)   \} | O_j\cap O_{j+1} ) \big) \nonumber\\
    %
    &\qquad - \big( f(O_j\cap O_{j+1}) - f( \{(e_{j+1},i_r), (e_r',\tilde{i}_{j+1})   \} | O_j\cap O_{j+1} ) \big)   \tag{def. of marginal gains}\\
    %
    %
    &=f( \{(e_{j+1}',\tilde{i}_{j+1}), (e_r',\tilde{i}_r)   \} | O_j\cap O_{j+1} ) \big)  - f( \{(e_{j+1},i_r), (e_r',\tilde{i}_{j+1})   \} | O_j\cap O_{j+1} ) \big)  \tag{cancel common terms}\\
    %
    %
    &\leq f( \{(e_{j+1}',\tilde{i}_{j+1}), (e_r',\tilde{i}_r)   \} | O_j\cap O_{j+1} ) \big) \tag{by monotonicity marginal gains are non-negative}\\
    %
    %
    &\leq f( \{(e_{j+1}',\tilde{i}_{j+1})   \} | O_j\cap O_{j+1} ) \big) %
    + f( \{ (e_r',\tilde{i}_r)   \} | O_j\cap O_{j+1} ) \big) \tag{by submodularity}. \\
    %
    %
   &\leq f( (e_{j+1}',\tilde{i}_{j+1}) | S_j ) + f( \{ (e_r',\tilde{i}_r)   \} | S_j ) \big), \label{eq:diff:o}
\end{align}
Where the last inequality follows by submodularity and $S_j\subseteq O_j\cap O_{j+1}$. We next determine that both $(e_{j+1}',\tilde{i}_{j+1})$ and  $(e_r',\tilde{i}_r)$ were feasible pairs to add to $S_j$ in \cref{alg:k-sub-is} when $(e_{j+1},i_{j+1})$ was selected.  Considering each of those terms, we note that by construction, we aligned indices of pairs in $\mathrm{OPT}$ that included items that were in pairs in $S^\circ$.  Thus, we have that $e_{j+1}' \not \in S_j $ and $e_r' \not \in S_j$, meaning that both elements were still available for \cref{alg:k-sub-is} to pick.  Additionally, by construction (since we swap to align types between $j+1$st pairs) neither type $\tilde{i}_{j+1}$ nor $\tilde{i}_r = i_{j+1}$ had been exhausted when \cref{alg:k-sub-is} selected $(e_{j+1},i_{j+1})$ to add to $S_j$.  Thus, since \cref{alg:k-sub-is} picked $(e_{j+1},i_{j+1})$ instead of either $(e_{j+1}',\tilde{i}_{j+1})$ or  $(e_r',\tilde{i}_r)$, the corresponding marginal gains could not have been much larger than that of $(e_{j+1},i_{j+1})$.  Namely, with $\tau_{j+1}$ denoting the threshold when $(e_{j+1},i_{j+1})$ was selected by \cref{alg:k-sub-is}, then 
\begin{align*}
    f((e_{j+1},i_{j+1})|S_j) \geq \tau_{j+1}
\end{align*}
and since the other two pairs were not selected in the previous round when the threshold was $(1-\varepsilon)^{-1}\tau_{j+1}$ (or, if $\tau_j=d$, the maximum marginal gain, then they are at most equal and the following still holds),
\begin{align*}  
    f((e_{j+1}', \tilde{i}_{j+1})|S_j) \leq (1-\varepsilon)^{-1} f((e_{j+1},i_{j+1})|S_j) 
\end{align*} and
\begin{align*}
    f((e_r', \tilde{i}_r)|S_j) \leq (1-\varepsilon)^{-1} f((e_{j+1},i_{j+1})|S_j)
\end{align*}

This allows us to continue \eqref{eq:diff:o}:
\begin{align}
    f(O_j) - f(O_{j+1}) %
    %
    % &\leq f( \{(e_{j+1}',\tilde{i}_{j+1})   \} | O_j\cap O_{j+1} ) \big) %     + f( \{ (e_r',\tilde{i}_r)   \} | O_j\cap O_{j+1} ) \big) \tag{by \cjq{eqref} }\\
    %
    %
    &\leq f( (e_{j+1}',\tilde{i}_{j+1}) | S_j ) + f( \{ (e_r',\tilde{i}_r)   \} | S_j ) \big)  \tag{by \eqref{eq:diff:o} } \\
    %
    %
    &\leq 2(1-\varepsilon)^{-1} f((e_{j+1},i_{j+1})|S_j). \label{eq:prf:IS:OtoSbnd:supp}
\end{align}%
%
Using this relation, we can now lower bound $f(S^\circ)$. %
%
\begin{align}
    f(\mathrm{OPT})-f(S^\circ)&=\sum_{j=0}^{B-1}(f(O_j)-f(O_{j+1}) \tag{telescoping sum}\\
    %
    &\leq \sum_{j=0}^{B-1} \frac{2}{1-\varepsilon}(f(S_{j+1})-f(S_j)) \tag{by \eqref{eq:prf:IS:OtoSbnd:supp}}\\
    %
    &=\frac{2}{1-\varepsilon}(f(S^\circ)-f(\emptyset)) \nonumber\\
    %
    &\leq \frac{2}{1-\varepsilon}f(S^\circ) \nonumber
\end{align}
which for $\varepsilon<1$ implies 
\begin{align}
    f(S^\circ) %
    %
    &\geq \frac{1-\varepsilon}{3-\varepsilon}f(\mathrm{OPT}) \nonumber\\
    %
    &\geq (\frac{1}{3}-\varepsilon)f(\mathrm{OPT}). \label{eq:prf:IS:case1:Sbnd}
\end{align} % for $\varepsilon<1$.

% \vspace{1cm}
\textbf{Case 2:} The final solution $S^\circ$ satisfies $|U_i(S^\circ)| < B_i$ for some $i\in [k]$.  Let $\ell_i=|U_i(S)|<B_i$ denote the number of items with type $i$ added.  Let $\tilde{S}$ denote a set of cardinality $B$ that \cref{alg:k-sub-ts} would have selected if \cref{alg:k-sub-ts} terminated only when either (a) $B_i$ pairs had been selected for any type $i$ or (b) the marginal gains on all remaining elements evaluated as zero. Without loss of generality, we only consider (a), as (b) would imply $f(\tilde{S}) = f(\mathrm{OPT})$ and subsequently the same bounds as we will show for (a).  Thus, by construction $S \subset \tilde{S}$ and $\tilde{S}$ has $\sum_{i \in [k]}(B_i-\ell_i)$ extra elements.

First, since $\tilde{S}$ has $B$ elements selected according to decreasing thresholds, the result \eqref{eq:prf:IS:case1:Sbnd} from \textbf{Case 1} holds for $\tilde{S}$, that for $\varepsilon<1$, %
%
\begin{align}
    f(\tilde{S}) &\geq  \frac{1-\varepsilon}{3-\varepsilon}f(\mathrm{OPT}). \label{eq:prf:IS:case2:augS}
\end{align} %
%
Second, since $S$ only accumulated $\sum_{i\in [k]}\ell_i$ elements before the terminal threshold bound of $\frac{(1-\varepsilon)\varepsilon d}{3B}$ was reached, then the marginal gains of the remaining $\sum_{i \in [k]}(B_i-\ell_i)$ elements in $\tilde{S}$ can be bounded, with the largest possible value of the threshold $\tau$ in the last execution of the \textbf{while} loop  being \begin{align*}
    (1-\varepsilon)^{-1}  \frac{(1-\varepsilon) \varepsilon d}{3B} = \frac{\varepsilon d}{3B},
\end{align*} leads to
\begin{align}
    f(\tilde{S}) - f(S^\circ) %
    %
    &\leq \sum_{(e,i) \in \tilde{S} \backslash S^\circ} f((e,i)|S^\circ) \tag{using Lemma~\ref{lem:1} in main paper} \\
    %
    &\leq \sum_{(e,i) \in \tilde{S} \backslash S^\circ} \frac{\varepsilon d}{3B} \nonumber\\
    %
    &= \sum_{i \in [k]}(B_i-\ell_i) \frac{\varepsilon d}{3B} \nonumber\\
    &\leq \frac{\varepsilon d}{3} \tag{$\sum_{i \in [k]}(B_i-\ell_i)\leq B$}\\
    %
    \Longleftrightarrow \qquad %
    %
    f(S^\circ) &\geq f(\tilde{S}) - \frac{\varepsilon d}{3}. \label{eq:prf:IS:case2:augtoS}
\end{align}
We note that  since by construction $S^\circ\subset\tilde{S}$, each of the item-index pairs in $\tilde{S} \backslash S^\circ$ must have items not in  $U(S^\circ)$,  the marginal gains in the formulas above are well-defined.

Combining \eqref{eq:prf:IS:case2:augS} and \eqref{eq:prf:IS:case2:augtoS}, %
%
\begin{align}
    f(S^\circ) %
    %
    &\geq f(\tilde{S}) - \frac{\varepsilon d}{3} \tag{by \eqref{eq:prf:IS:case2:augtoS}} \\
    %
    %
    &\geq \frac{1-\varepsilon}{3-\varepsilon}f(\mathrm{OPT}) - \frac{\varepsilon d}{3} 
     \tag{by \eqref{eq:prf:IS:case2:augS}} \\
        %
    &\geq \frac{1-\varepsilon}{3-\varepsilon}f(\mathrm{OPT}) -  \frac{\varepsilon f(\mathrm{OPT})}{3} 
     \tag{by submodularity and choice of $d$}\\
     %
     &\geq (\frac{1}{3}-\varepsilon)f(\mathrm{OPT}). \nonumber %\label{eq:prf:IS:case2:combinedbnd}
\end{align}

\end{proof}

\section{Other Related Works Not Considered as Baseline}
There are some related works mentioned in \cref{tab:related-work} but not considered as baselines. We provide detailed reasons and discussions here:

\begin{itemize}
    \item \citep{qian2017constrained}: The authors propose an evolutionary algorithm. They only proposed and analyzed the algorithm for total size constraints, not for individual size constraints. We note that the algorithm runs continuously; the run time shown in our \cref{tab:related-work} is only an expectation of time for the algorithm to obtain the desired approximation guarantee of $1/2$. Their code is publicly available, though was implemented for sensor placement experiments using a small portion of the data. We modified it to run for the whole data set, but those experiments are  slow compared to other methods. One fundamental reason is that the evolutionary algorithm cannot incorporate lazy evaluation. This is significant as in the example of sensor placement with $k=3$, total budget of 36, the greedy algorithm without lazy evaluation would require 3,888 function evaluations (using lazy evaluation, only 1627 were required as shown in \cref{fig:TS:eval}). Also, the mutation process takes $\mathcal{O}(n)$ time even if the mutation is not accepted. While all the algorithms considered in the paper run within minutes, the evolutionary algorithm did not finish a case for total budget $B=5$ within 2 hours. As the main purpose of the paper is to improve the time complexity of existing algorithms, the run time of the proposed evolution algorithm is much worse than the greedy algorithm (even without lazy evaluation), so we do not include it.
    \item \citep{ene2022streaming}: This paper considers the streaming setting, where base elements arrive one at a time in an arbitrary (adversarial) order. We did not think it would be fair to compare it to offline methods like our threshold greedy method, the stochastic greedy, or greedy methods.
    \item \citep{matsuoka2021maximization}: This paper does not propose a new algorithm. The authors analyze the greedy algorithm (which we include in experiments). The authors prove the greedy algorithm achieves a better approximation ratio for the sub-class of $k$-submodular functions with bounded curvature.
\end{itemize}




\bibliography{refs.bib}

\end{document}
