\documentclass[table, accepted]{uai2024} % for initial submission
\usepackage[dvipsnames]{xcolor}
\hypersetup{colorlinks=true, citecolor=Violet,linkcolor=Mahogany,urlcolor=blue}

\usepackage{fix-cm} % Helps in scaling fonts

% \documentclass[accepted]{uai2024} % after acceptance, for a revised version;
% also before submission to see how the non-anonymous paper would look like

%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2024} % ptmx math instead of Computer
                                         % Modern (has noticeable issues)
% \documentclass[mathfont=newtx]{uai2024} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % enables numeric citations
\bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{tikz} % nice language for creating drawings and diagrams

\usepackage[inline]{enumitem}
\newlist{mylist}{enumerate*}{1}
\setlist[mylist]{label=\textbf{(\arabic*)}}

%\usepackage[small]{caption}
%\usepackage{bbm}
%\captionsetup[table]{skip=10pt}
%\NewDocumentCommand{\codeword}{v}{
%\texttt{\textcolor{black}{#1}}
%}
\usepackage{amsmath, amssymb, amscd, amsthm, amsfonts}
% \usepackage{mathrsfs} % For script font
% \usepackage{unicode-math}
\usepackage{dutchcal} 
\usepackage{thmtools}


\usepackage{float}
\usepackage{multirow}
\usepackage{caption}
\usepackage{wrapfig}

% \usepackage[dvipsnames]{xcolor}

%\usepackage[usenames, dvipsnames, table]{xcolor}
\newtheorem{theorem}{Theorem}
\newtheorem{assumption}{Assumption}
\newtheorem{lemma}{Lemma}
\newtheorem{lemmma}{Lemma}
\newtheorem{proposition}{Proposition}
\newtheorem{vignette}{Vignette}
\newtheorem{definition}{Definition}


% Recommended, but optional, packages for figures and better typesetting:
\usepackage{microtype}
\usepackage{graphicx}
\usepackage{subfigure}
\usepackage{booktabs} % for professional tables

% hyperref makes hyperlinks in the resulting PDF.
% If your build breaks (sometimes temporarily if a hyperlink spans a page)
% please comment out the following usepackage line and replace
% \usepackage{icml2024} with \usepackage[nohyperref]{icml2024} above.
% \usepackage{hyperref}
% \usepackage[bookmarks, colorlinks=true, citecolor=Violet,linkcolor=Mahogany,urlcolor=blue]{hyperref}

\newcommand{\ljr}[1]{\textcolor{red}{#1}}
% Attempt to make hyperref and algorithmic work together better:
\newcommand{\theHalgorithm}{\arabic{algorithm}}

\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{mathtools}
\usepackage{amsthm}

% if you use cleveref..
\usepackage[capitalize,noabbrev]{cleveref}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% THEOREMS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\theoremstyle{plain}
\newtheorem{corollary}[theorem]{Corollary}
\theoremstyle{remark}
\newtheorem{remark}[theorem]{Remark}

% Todonotes is useful during development; simply uncomment the next line
%    and comment out the line below the next line to turn off comments
%\usepackage[disable,textsize=tiny]{todonotes}

\usepackage{algorithm, algorithmic}
%\usepackage[bookmarks, colorlinks=true, citecolor=Violet,linkcolor=Mahogany,urlcolor=blue]{hyperref}

% Recommended, but optional, packages for figures and better typesetting:
\usepackage{subfigure}

%


%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{Efficient Interactive Maximization of BP and Weakly Submodular Objectives}

% The standard author block has changed for UAI 2024 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{\href{mailto:<adhyyan@uw.edu>}{Adhyyan Narang}{}}
\author[2]{Omid Sadeghi}
\author[1]{Lillian Ratliff}
\author[1]{Maryam Fazel}
\author[1]{Jeff Bilmes}
% Add affiliations after the authors
\affil[1]{%
   Electrical and Computer Engineering\\
   University of Washington\\
   Seattle, Washington, USA
}
\affil[2]{
  Sloan School of Management\\
  Massachusetts Institute of Technology\\
  Boston, MA, USA 
}

% The below macros are copied in from \input{../commands.tex}
\makeatletter
\newcommand\notsotinyfortable{\@setfontsize\notsotinyfortable{6.3}{6.8}}



\newcommand{\tvar}{\ensuremath{t}}
\newcommand{\Tvar}{\ensuremath{T}}


\newcommand\notsotinyforalgorithm{} % \@setfontsize\notsotinyfortable{7.0}{7.5}}

\newcommand{\hatDelta}{\widehat{\Delta}}
\newcommand{\hatTq}{\widehat{\Tvar_q}}
\newcommand{\hatpi}{\widehat{\pi}}

\definecolor{airforceblue}{rgb}{0.36, 0.54, 0.66}
\newcommand{\an}[1]{{\color{magenta} Adhyyan: #1}}

\newcommand{\track}[1]{#1}

\newcommand{\mf}[1]{{\color{green} MF: #1}}

\newcommand{\norm}[1]{\| #1 \|}
\newcommand{\deff}{d_{\text{eff}}}

\newcommand\numberthis{\addtocounter{equation}{1}\tag{\theequation}}
%\DeclareMathOperator{\PO}{PO}
%\DeclareMathOperator{\PR}{PR}
\newcommand{\mutildei}{\tilde{\mu_{\tvar}}}
\newcommand{\sigmatildei}{\tilde{\sigma_{\tvar}}}
\newcommand{\tildeyvect}{\mathbf{\tilde{y}}_\tvar}
\newcommand{\tildeyvecold}{\mathbf{\tilde{y}}_{\tvar-1}}
\newcommand{\Kgginv}{K_{GG}^{-1}}
\newcommand{\R}{\mathbb{R}}
\newcommand{\Rt}{\mathcal{R}_\tvar}
\newcommand{\RT}{\mathcal{R}_\Tvar}
\newcommand{\kappaf}{\kappa_f}
\newcommand{\kappag}{\kappa^g}
\newcommand{\kappafk}{\kappa_{f, q}}
\newcommand{\kappagk}{\kappa^g_q}
\newcommand{\gammak}{\gamma_q}
\newcommand{\alphak}{\alpha_q}
\newcommand{\Sk}{S_q}
\newcommand{\Skj}{S_{j,q}}
\newcommand{\Skjnext}{S_{j+1,q}}
\newcommand{\Tk}{\Tvar_q}
\newcommand{\hk}{h_q}
\newcommand{\sj}{s_j}
\newcommand{\defect}{l}
\newcommand{\Skstar}{S_q^\ast}

\newcommand{\fone}{f_{1}}
\newcommand{\fkone}{f_{q,1}}
\newcommand{\gkone}{g_{q,1}}
\newcommand{\lkone}{l_{q}}
\newcommand{\lktwo}{l_{q,2}}
\newcommand{\lk}{l_{q}}
\newcommand{\fk}{f_{q}}
\newcommand{\gk}{g_{q}}

\newcommand{\rmjsum}{\sum_{j = 1}^{\Tk} r_{m_j}}

\newcommand{\rjsum}{\sum_{j = 1}^{k} r_{j}}

\newcommand{\rmj}{r_{m_j}}
\newcommand{\nystrom}{nystr\"om}
\newcommand{\Nystrom}{Nystr\"om}
\newcommand{\NystromSelect}{Nystr\"omSelect}

% context vector variable.
\newcommand{\ctx}{\ensuremath{\phi}}
% The set of possible contexts
\newcommand{\Ctxs}{\ensuremath{\Phi}}

\newcommand{\kernel}{\ensuremath{\mathcal{k}}}
\makeatother

\DeclareMathOperator{\argmax}{argmax}
\DeclareMathOperator{\argmin}{argmin}

% Reduce spacing before and after the algorithm environment
\setlength{\floatsep}{10pt}
\setlength{\textfloatsep}{10pt}
\setlength{\intextsep}{10pt}


\ifdefined\secminuslength
\else
  \newlength{\secminuslength}
\fi

\begin{document}
\maketitle

\begin{abstract}
In the context of online interactive machine learning with 
combinatorial objectives, we extend purely submodular prior work to 
more general non-submodular objectives.  
This includes: (1) those that are additively decomposable into a sum of
 two terms (a monotone submodular and monotone supermodular term, 
 known as a BP decomposition); 
 and (2) those that are only weakly submodular.  
 In both cases, 
 this allows representing 
 not only competitive (submodular) 
 but also complementary (supermodular) relationships between objects, 
 enhancing this setting to a broader range of applications 
 (e.g., movie recommendations, medical treatments, etc.) 
 where this is beneficial.  
 In the two-term case, moreover, 
 we study not only the more typical monolithic feedback approach but 
 also a novel framework where feedback is available separately for each 
 term.  
 With real-world practicality and scalability in mind, 
 we integrate \Nystrom{} sketching techniques to significantly reduce 
 the computational cost, 
 including for the purely submodular case. 
 In the Gaussian process contextual bandits setting, 
 we show sub-linear theoretical regret bounds in all cases. 
 We also empirically show good applicability to recommendation systems 
 and data subset selection.
 The code for this paper is available at: 
 \url{https://github.com/AdhyyanNarang/online_bp}.
\end{abstract}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Introduction}
\label{sec:introduction}


Many machine learning paradigms are offline, where a learner must
understand the associations and relationships within a dataset that is
gathered, fixed, and then presented.  Interactive learning, by
contrast, involves a dynamic, repeated, and potentially everlasting
interaction between the algorithm (learner) and the environment
(teacher), better mimicking natural organisms as they proceed through
life. Interactive learning is quite important for applications such as
recommender systems~\citep{mary2015bandits}, natural language and
speech processing~\citep{ouyang2022training}, interactive computer
vision~\citep{le2022deep}, advertisement
placement~\citep{schwartz2017customer}, environmental monitoring~\citep{srivastava2014surveillance},
personalized medicine~\citep{durand2018contextual}, adaptive website
optimization~\citep{white2013bandit}, and robotics~\citep{robotics_rl}, to name only a few.

\begin{table*}[t]
  \centerline{
  \notsotinyfortable{
    \begin{tabular}{l|c|c|c|c|c|}
  \cline{2-6}
  & \textbf{Offline} & \textbf{Pure Online} & \textbf{Online + \Nystrom} & \textbf{Online + Sep.\ FB} & \textbf{Online + \Nystrom{} + Sep.\ FB} \\ \hline
  \multicolumn{1}{|l|}{\textbf{Modular}} & & \cite{icml2010_srinivas, nips2011_krause} & \cite{zenati22a} & N/A & N/A \\ \hline
  \multicolumn{1}{|l|}{\textbf{SM}} & \cite{nemhauser1978analysis} & \cite{NIPS2017_krause} & \checkmark \cellcolor{green!25} & N/A & N/A \\ \hline
  \multicolumn{1}{|l|}{\textbf{BP}} & \cite{jeff_bp} & \checkmark \cellcolor{green!25} & \checkmark \cellcolor{green!25} & \checkmark \cellcolor{green!25} & \checkmark \cellcolor{green!25}\\ \hline
  \multicolumn{1}{|l|}{\textbf{WS}} & \cite{das2011submodular, bian2019guarantees} & \checkmark \cellcolor{green!25} & \checkmark \cellcolor{green!25} & N/A & N/A \\ \hline
  \end{tabular}
  }
  }
  \caption{The present paper's novelty (\colorbox{green!25}{green \checkmark} which means new algorithms for sublinear regret) in
    the context of previous work.
    Here {\bf SM} refers to SubModular,
    {\bf BP} to suBmodular-suPermodular, and
    {\bf WS} to Weakly Submodular.
    {\bf Sep FB}  refers to the separate feedback BP setting introduced in this paper.
    N/A means not applicable.}
  \label{tab:my_label}
\end{table*}

%Contextual bandits
The fundamental mathematical challenge in these settings is to
optimize a utility function that encapsulates the value or payoff of
different actions within a specific context.  While there are many
instances within this paradigm, including reinforcement, active,
online, and human-in-the-Loop (HitL) learning, one such setting is
contextual bandits.  In contextual bandits, the agent observes a set
of features (a context vector), takes an action and then gets a
reward from the environment. The goal is to maximize the total accumulated reward over a series of
actions over time.  The context significantly influences the optimal choice
of action. For example, in movie recommendation systems, the context
might include user demographics (even a specific user), past viewing
history, time of day, and so on.

%GPCB's
Gaussian Process Contextual Bandits (GPCB) extend this basic idea by
incorporating Gaussian Processes (GPs) for modeling the unknown reward
function \citep{seeger2008information,icml2010_srinivas,
  nips2011_krause, Valko2013FiniteTimeAO, camilleri21a}.  This
approach is particularly effective in scenarios where the relationship
between the context, actions, and rewards is complex and
non-linear. For any given context $\ctx_{u_\tvar}$ (where $u_\tvar \in [m]$ is
the index of one of $m \in \mathbb{Z}_+ \cup \{\infty\}$ possible contexts at time $t$), GPs allow the
easy expression of a posterior distribution based on previous rounds
in terms of Gaussian conditional mean $\mu_{\ctx_{u_\tvar}}$ and condition
variance $\sigma^2_{\ctx_{u_\tvar}}$ vectors where the former encodes value
and the latter encodes uncertainty.  These are combined in Upper
Confidence Bound (UCB) algorithms as
$\mu_{\ctx_{u_\tvar}}(v) + \beta_\tvar \sigma^2_{\ctx_{u_\tvar}}(v)$ to offer a combined
valuation of action $v \in V$ in the context of $\ctx_{u_\tvar}$ in terms of
exploration (high $\sigma^2_{\ctx_{u_\tvar}}(v)$) vs.\ exploitation (high
$\mu_{\ctx_{u_\tvar}}(v)$) where $\beta_\tvar$ is a computed time-dependent tradeoff
coefficient. The goal of GPCB is traditionally to minimize cumulative
regret, where the rewards at each time are compared to the best choice
at that time:
\abovedisplayshortskip=.4ex\belowdisplayshortskip=.4ex\abovedisplayskip=.4ex\belowdisplayskip=.4ex
$$
\mathcal{R}(\Tvar) = \sum_{t=1}^\Tvar f_{\ctx_{u_\tvar}}(v^*_{\ctx_{u_\tvar}}) -
f_{\ctx_{u_\tvar}}(v_\tvar), 
$$ 
where $v^*_{\ctx_{u_\tvar}}$ is the best choice for context
$\ctx_{u_\tvar}$ and $v_\tvar$ is the algorithm's choice at time $t$. Sublinear
regret means this increases more slowly than $\Tvar$ (i.e.,
$\lim_{\Tvar \to \infty} \mathcal{R}(\Tvar)/\Tvar = 0$).

%Chen et.al combines with submodular
\citet{NIPS2017_krause} made the important observation that GPCBs can
be used for online combinatorial, specifically
submodular, 
maximization, where an input set of size $\Tvar$ is incrementally
constructed over time. Offline monotone submodular maximization is
NP-hard but a greedy algorithm has an $\alpha$ multiplicative
approximation~\citep{nemhauser1978analysis} for $\alpha = 1-1/e$.
\citet{NIPS2017_krause} utilizes $\alpha$-regret, where the regret of
the online algorithm at time $\Tvar$ is based on comparing with the
$\alpha$ approximation of the offline algorithm, specifically
\abovedisplayshortskip=.4ex\belowdisplayshortskip=.4ex\abovedisplayskip=.4ex\belowdisplayskip=.4ex
\[
\mathcal{R}(\Tvar) = \alpha \sum_{q=1}^m f_{\ctx_q}(S^*_q) -
f_{\ctx_q}(S_{\Tvar_q,q}),
\]
where $f_{\ctx_q}$ is the submodular function for context $\ctx_q$ having $S^*_q$ as the optimal solution 
and $S_{\Tvar_q,q}$ is the algorithm's incrementally-computed
attempted solution both of size $|S^*_q|=|S_{\Tvar_q,q}|=T_q$, and
$T = \sum_q \Tvar_q$ where $T_q$ is the frequency of context $q$.
Thus, unlike the standard GPCP above which uses a summation of
pointwise quantities, this combinatorial $\alpha$-regret utilizes the
interdependencies between elements evaluated by the submodular
function.  These interdependencies strongly influence the best choices
at different time steps because of the submodular (i.e.,
non-independent) relationships. Another critical feature is that the
function $f$ is not available to the algorithm --- rather only noisy
gain queries $y_\tvar$ of the form
$y_\tvar = f(v|S_\tvar) + \epsilon_\tvar$ are available \emph{after}
$v$ has been committed, where $v$ is the algorithmic choice,
$S_\tvar = \{v_1,v_2, \dots, v_{t-1}\}$ constitutes the previous and
now fixed set of choices, and $\epsilon_\tvar$ is independent noise.
Compared to the offline setting, the online optimization setting
becomes significantly more mathematically challenging and requires
smoothness assumptions to achieve sublinear regret. However, the
online setting is natural for many applications.

%Limitations of that
Despite its many benefits, the purely submodular assumption
is not sufficiently expressive to capture essential properties of many
real-world environments. Consider the example of movie recommendations
--- in some cases, recommending a movie and its sequel will yield
greater rewards than recommending the two movies independently, a
complementarity (i.e., supermodularity) amongst actions. In
personalized medicine, certain combinations of medicines
might
together possess pharmacological synergy (a supermodularity) while
other combinations will be lethal (a submodularity).

\paragraph{Contributions.} 
In the present paper, we offer results that
achieve sublinear $\alpha$-regret in the GPCB setting for a variety of
non-linear non-submodular utility functions that previously have not
been considered in the online setting.

We first consider when non-submodular utility functions $h=f+g$ can be
additively decomposed into the sum of a monotone submodular $f$ and
monotone supermodular $g$ components, known as a ``BP'' function
(Definition~\ref{def:bp}). BP functions allow for a much more
expressive representation of utility, capturing both the diminishing
returns inherent in submodular and the increasing complementary
returns characteristic of supermodular functions.  \citet{jeff_bp}
introduced and studied the offline maximization of BP functions
subject to a cardinality constraint --- this was shown
to have an approximation ratio of
$\alpha = \frac{1}{\kappa_{f}} \left[1 - e^{-(1 - \kappa^g)
    \kappa_{f}}\right]$ where $\kappa_f, \kappa^g \in [0,1]$ are the
submodular and supermodular curvatures that respectively measure how
far the functions $f,g$ are from being modular (see Section~\ref{sec:backgr-subm-superm}). In general, this
problem is inapproximable, but if $\kappa^g<1$, it is possible to
obtain approximation ratios for this problem using the greedy
algorithm. We study this problem in the $\alpha$ regret case showing
sublinear regret. More interestingly, this decomposition enables us to
study a novel form of \emph{separate feedback} where we receive separate
rewards each for the submodular $f$ and supermodular $g$ components.
In an interactive recommender system, for example, the utility
function might represent the combined effects of user satisfaction
(submodular due to saturation of interests) and network effects
(supermodular due to the increasing value of shared community
experiences) the rewards each of which can be available
separately. The stronger separate feedback case allows us to provide a
stronger $\alpha$-regret with
$\alpha = \min \left\{ 1 - \frac{\kappa_f}{e}, \; 1 -\kappa^g
\right\}$. This choice is inspired by~\citet{distorted_greedy}, who
proposed a distorted version of the offline greedy algorithm for BP
maximization problems and provided an improved
$\min\{1-\frac{\kappa_f}{e},1-\kappa^g \}$ approximation ratio.
See Appendix~\ref{sec:app_error} for further commentary on this approach.
\looseness-1

When $h: 2^V \to \mathbb R$ is not decomposable as with a BP function,
we next consider a monolithic $h$ that is \emph{Weakly Submodular}
(WS), defined as the following being true:
$\forall A \subseteq B \subseteq V$,
$\sum_{b \in B \setminus A} h(b|A) \geq \lambda h(B|A)$ for some
$\lambda \in [0,1]$ where $h(B|A) \triangleq h(B\cup A) - h(A)$.  The largest $\lambda$ for which $h$ is weakly
submodular is known as the submodularity
ratio $\gamma $~\citep{das2011submodular,JMLR:v19:16-534}, and $h$ is submodular
if and only if $h$ is $1$-weakly submodular.
\cite{bian2019guarantees} also introduced a generalized version of the
submodular curvature $\zeta$ for WS functions and studied the
approximation ratio of the offline greedy algorithm on such
functions. Inspired by these results, we present a sublinear regret
bound on WS functions with
$\alpha = \frac{1}{\zeta}\big(1-e^{-\zeta\gamma}\big)$.

We remark that in general, just because an offline algorithm can
achieve an $\alpha$-approximate solution to an NP-hard problem does
not guarantee that the online GPCB version can achieve sublinear
$\alpha$-regret --- it is in general quite challenging to show
sublinear $\alpha$-regret for new problems especially in the
combinatorial case when there are such dependencies between previous
and current actions.

A third contribution of our paper further addresses the main practical
computational complexity challenge with GPs, especially in
high-dimensional spaces.  The problem arises from performing
operations on Gram covariance matrices, whose shape increase as the
number of observations grow.  We address this challenge, for both the
$h$ as a BP function and $h$ as a WS function cases, by showing the
applicability of \Nystrom{} approximations, a technique traditionally
used in kernelized learning to efficiently handle large-scale data. We
show that \Nystrom{} approximations facilitate the efficient
computation of our utility functions by approximating its components
in a lower-dimensional space. This method significantly reduces the
computational complexity from the prohibitive $O(\Tvar^3)$ to a more
manageable form, typically $O(C\Tvar N^2)$ where $N$ is substantially
smaller than $\Tvar$ and represents the number of points in the
\Nystrom{} approximation.

Lastly, in our numerical experiments, we empirically demonstrate the
above for two applications, movie recommendations and in machine
learning training data subset selection.

\paragraph{Background and Other Related Work.} 
The above introduces this paper's 
novel contributions in the context of previous work which
Table~\ref{tab:my_label} briefly summarizes.  A very detailed
literature review is given in Appendix~\ref{app:related_work}.  We
highlight that our algorithms are inspired by the developments in
GPCBs that enable the optimization of unknown functions in
low-information online environments \citep{icml2010_srinivas,
  nips2011_krause, Valko2013FiniteTimeAO, camilleri21a}. In
particular, \citet{zenati22a} improves computational efficiency by
using \Nystrom{} points to speed up the algorithm with the same
asymptotic regret guarantee as prior work.  Additionally, there is a
line of work on ``combinatorial bandits'' that may appear similar to
our formulation at first glance~
\citep{takemori20a, caramanis, kveton,
chen_context_combinatorial, Nie2023-bv, NIPS2008_5751ec3e} 
-- these study the computational
complexity of learning an unknown submodular function. However, the
feedback model in those papers is entirely different than us: a new
submodular function arrives at each time step and an entire set is
recommended.  In the present work, as mentioned above, we accumulate a
selected set over time for functions that arrive repeatedly. Hence,
this body of work is not comparable to the present work.

In the following section, we begin directly with our problem formulation.  For
further background on submodularity, supermodularity, BP functions, and
various curvatures, see Appendix~\ref{sec:backgr-subm-superm}.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Problem Formulation}
\label{sec:problem-formulation}

Our optimizer operates in an environment
that occurs over $\Tvar$ time steps. Specifically, at each time step
$\tvar \in [\Tvar]$:
\begin{enumerate}[leftmargin=*]
\item The optimizer encounters one of $m$ set functions from the set
  $\{h_1 \ldots h_m \}$ each defined over the finite ground set
  $V$. The optimizer is ignorant of the function but
  knows its index $u_\tvar \in [m]$ as well
  as a context or feature-vector $\ctx_{u_\tvar}$
  for that index at round $\tvar$.

\item The optimizer computes and then performs/plays action
  $v_{\tvar} \in V$, and then adds $v_\tvar$ to its growing
  context-dependent set $S_{t_{u_\tvar},u_\tvar}$ of size
  $|S_{t_{u_\tvar},u_\tvar}|=t_{u_\tvar}$ with
  $\sum_{j \in [m]} t_j = t$. The set $S_{t_{u_\tvar},u_\tvar}$
  contains all items so far selected for the unknown function
  $h_{u_\tvar}$.

\item The environment offers the optimizer noisy marginal gain feedback. There are
  two feedback models:
  \begin{enumerate}[label=(3\alph*)]
  \item \textit{Monolithic Feedback:} The optimizer receives $y_\tvar$ with
    $y_\tvar = h_{u_\tvar} (v_\tvar| S_{t_{u_\tvar}, u_\tvar}) + \epsilon_\tvar$.
  \item \textit{Separate Feedback:} In the BP case,
    $(y_{f, \tvar},y_{g, \tvar})$ may be available with
    $y_{f, \tvar} = f_{u_\tvar} (v_\tvar| S_{t_{u_\tvar}, u_\tvar}) +
    \epsilon_\tvar/2$ and
    $y_{g, \tvar} = g_{u_\tvar} (v_\tvar| S_{t_{u_\tvar}, u_\tvar}) +
    \epsilon_\tvar/2$.
  \end{enumerate}
\end{enumerate}
The separate feedback case (3b) is relevant only
for applications (e.g., multiple surveys, etc.) where it is feasible.
Section~\ref{sec:separate_feedback} exploits this richer feedback
to improve performance. All feature-vectors $\ctx_{u_\tvar}$ are
chosen from set $\Ctxs$ of size $|\Ctxs|=m$, and we assume that the identity of the utility
function $h_q$ is determined uniquely by $\ctx_q$; hence, when clear
from context, we use $h_q$ to refer to $h_{\ctx_q}$.

We observe how two applications may be formalized in our
framework.  Vignette~\ref{vignette:active_learning} is further
explored in Appendix~\ref{sec:app_numerical}.

\begin{vignette}[Movie Recommendations]
\label{vignette:movie}
\normalfont Each function $h_q$ captures the preferences of a single
user $q \in [m]$, and the index $u_\tvar \in [m]$ reveals which user
has arrived at time step $\tvar$. The action $v_\tvar$ performed at
time $\tvar$ is the optimizer's recommended movie to user
$u_\tvar$. The feature vector $\phi_{u_\tvar}$ contains user-specific
information, e.g., age range, favorite movies and genres, etc.  The
feedback gain $h_{u_\tvar}(v|A)$ is the enjoyment user $u_\tvar$ has
from watching movie $v$ having already watched the movies in set $A$.
\end{vignette}
\vspace{-0.5\baselineskip}
\begin{vignette}[Active Learning]
\label{vignette:active_learning}
\normalfont The optimizer chooses training points to be labeled for $m$ related tasks on the same dataset - for instance classification, object detection, and captioning. 
The function $h_q (A)$ is the test accuracy
of a predictor trained on set $A$ on the $q^{th}$ task. 
Choosing an action $v_\tvar$ is tantamount to choosing a training point to be labeled for task $u_\tvar \in [m]$.
\end{vignette}

In our quest to design low regret online item-selection strategies for
these problems (made precise in Section~\ref{sec:alpha_regret}), we
first study the robustness of the greedy procedure for the offline
optimization of Monotone Non-decreasing Normalized (MNN)
functions (see Appendix~\ref{sec:backgr-subm-superm}) in
Section~\ref{sec:robustness}. Then in Section~\ref{sec:alpha_regret},
we show that our proposed online procedure approximates the offline
greedy algorithm, leveraging Section~\ref{sec:robustness} to obtain
online guarantees.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Offline Algorithm Robustness}
\label{sec:robustness}

We consider the problem of cardinality-constrained optimization of a MNN objective $h: 2^V \to \R$:
\abovedisplayshortskip=.4ex\belowdisplayshortskip=.4ex\abovedisplayskip=.4ex\belowdisplayskip=.4ex
\begin{align}
\label{eq:mnn_offline_opt}
\max_{S \in 2^V} h(S): |S| \leq k
\end{align}
Let $S^\ast$ denote an achieving set solving
Equation~\eqref{eq:mnn_offline_opt}. The most common approximation
algorithm for this problem
greedily~\citep{nemhauser1978analysis} maximizes the available marginal gain having
oracle access to $h$. In online settings, however,
we do not have this luxury. To help us analyze the online setting, therefore, we
consider a modified offline algorithm where the greedy choices might
be good only with respect to a set of additive ``slack'' variables
$r_j$, exploring the impact of this modification on approximation
quality for different classes of functions. Then in
Section~\ref{sec:alpha_regret} we develop an online algorithm that
emulates greedy in this way.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Greedy Selection Robustness}
\label{sec:greedy-select-robust}

We define an \textbf{approximate greedy} selection rule that, given
scalars $\{r_j\}_{j=1}^k$, chooses $v_j$ for each
$j \in [k]$ satisfying:
\begin{equation}
\label{eq:approximate_selection_rule}
  v_j \in \{v: h(v|S_{j-1}) \geq \argmax_{\tilde{v}} h(\tilde{v}|S_{j-1}) - r_j \},
\end{equation}
where $S_{j} = \{v_1 \ldots v_j\}$ and $S^*$ the optimal set of size $k$.
\begin{restatable}{lemma}{lemmaone}
  \label{prop:robust_greedy_bp}
  Any output $S$ of the approximate greedy selection rule in
  Equation~\eqref{eq:approximate_selection_rule} admits the following
  guarantee for BP objectives (Def.~\ref{def:bp}) for
  Problem~\eqref{eq:mnn_offline_opt}:
  \[
    h(S) \geq \frac{1}{\kappa_{f}} \left[1 - e^{-(1 - \kappa^g) \kappa_{f}}\right] h(S^\ast) - \sum_{j=1}^k r_j
  ,\]
  where $\kappa_f, \kappa^g$ are as defined in Definitions~\ref{def:submod_curvature} and ~\ref{def:supermod_curvature}.
\end{restatable}

A proof is in
Appendix~\ref{sec:appr-greedy-bp}. This result is a generalization
of~\citet[Theorem 3.7]{jeff_bp} which is recovered by setting
$\forall j, r_j = 0$.  This result is surprising because, with the
supermodular part of the BP function, poor early selections may
preclude the ability to exploit potential increasing returns from $g$
--- the curvature $\kappa^g$ is crucial for this. The result can also be
understood as a generalization of \citet{NIPS2017_krause}, which
studies the robustness of the greedy algorithm to errors in submodular
functions. In their case, however, they adapt the simple classical
greedy algorithm proof~\citep{nemhauser1978analysis}.  In
Appendix~\ref{sec:app_naive}, we provide an alternate proof using a
crude bound that incorporates the supermodular curvature but ignores
the submodular curvature, reminiscent of the argument
in~\citet{NIPS2017_krause}. However, the approximation ratio obtained
is much worse than that of~\citet{jeff_bp}.

Therefore, we study (in Appendix~\ref{sec:appr-greedy-bp}) the
robustness using the detailed analysis in~\citet{jeff_bp}.  This poses
a considerable challenge, since \citet{jeff_bp} (inspired by
\citet{conforti1984submodular}) formulate an intricately designed
series of linear programs to show that any selection that has as much
overlap with the optimal solution as the greedy algorithm must achieve
the desired approximation ratio. Here, the errors $r_j$ manifest as
perturbations to the constraints of the linear programs.  We then
perform a \textit{sensitivity analysis} of the linear programs to
argue that these perturbations to the constraints lead to a linear
perturbation to the optimal objective and does not cause it to
explode.

In the case where $h$ does not have a BP decomposition, we
offer the following result generalizing~\citet{bian2019guarantees}.
\begin{restatable}{lemma}{lemmatwo}
\label{lem:robust_greedy_sr}
      Any output $S$ of the approximate greedy selection rule in Equation~\eqref{eq:approximate_selection_rule} admits the following guarantee on objectives with submodularity ratio $\gamma$ and generalized curvature $\zeta$ (Definitions~\ref{def:submod_ratio} and \ref{def:gen_curvature}) for Problem~\eqref{eq:mnn_offline_opt}:
  \[
    h(S) \geq \frac{1}{\zeta}\big(1-e^{-\zeta \gamma}\big) h(S^\ast) - \sum_{j=1}^k r_j
  .\]
\end{restatable}%
A proof is given in Appendix~\ref{sec:appr-greedy-ws}. We see in
Section~\ref{sec:alpha_regret} that Lemmas~\ref{prop:robust_greedy_bp}
and \ref{lem:robust_greedy_sr} are key to the analysis of
Algorithm~\ref{alg:smucb}.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% \subsection{Robustness of Distorted Greedy Selection}
\subsection{Distorted BP Greedy Robustness}
\label{sec:sub_distorted_robustness}

In \citet{distorted_greedy}, the authors present a ``distorted'' version of the greedy algorithm,
which achieves a better greedy approximation ratio than~\citet{jeff_bp} for
Problem~\eqref{eq:mnn_offline_opt} with a BP objective.
Here, we study its robustness.

As in \citet{sviridenko2017optimal}, we define the modular lower
bound of the submodular function
$l_{1}(S) = \sum_{j \in S} f(j| V \backslash \{j\})$.  Also,
define the totally normalized submodular function as
$\fone(S) = f(S) - l_{1}(S)$.  Note that $\fone$ always has
curvature $\kappa_f = 1$ and also that $h (S) = f_{1}(S) + g (S) + l_{1}(S)$. 
We define the function $\pi_{j} (v | A)$ as:
\begin{equation}
\label{def:pi}
    \pi_{j} (v | A) = \left(1 - \frac{1}{k}\right)^{k - j - 1} f_{1} (v|A) + g(v|A) + l_{1}(v)
\end{equation}
In~\citet{distorted_greedy}, the
optimizer greedily maximizes the $\pi_{j}$ function at step $j$ rather than
the original BP gain.  In $\pi_{j}$, the submodular
part is down weighted relative to the supermodular part.  Intuitively,
this is helpful because the supermodular part is initially much
smaller than the submodular part, but ultimately dominates the sum.
Thus, it is in the optimizer's interest to focus on the supermodular
part early, rather than waiting until it becomes large.

We define the \textbf{approximate distorted greedy} selection rule as
follows. Given scalars $\{r_j\}_{j=1}^k$, in each step
$j = \{1, \ldots, k\}$, the optimizer chooses an item $v_j$ that
satisfies
 \begin{equation}
\label{eq:approximate_selection_rule_distorted}
  v_j \in \{v: \pi_j(v|S_{j-1}) \geq \argmax_{\tilde{v}} \pi_j(\tilde{v}|S_{j-1}) - r_j \}.
\end{equation}
We present a robust version of~\citet{distorted_greedy}:
\begin{restatable}{lemma}{lemmathree}
\label{prop:robust_distorted_greedy}
   Any output $S$ of the approximate distorted greedy selection rule in Equation~\eqref{eq:approximate_selection_rule_distorted} admits the following guarantee for Problem~\eqref{eq:mnn_offline_opt} with a BP objective (Def.~\ref{def:bp}):
  \[
    h(S) \geq \min \left\{ 1 - \frac{\kappa_f}{e}, \; 1 -\kappa^g \right\} h(S^\ast) - \sum_{j=1}^k r_j
  ,\]
  where $\kappa_f, \kappa^g$ are as defined in Def.~\ref{def:submod_curvature} and \ref{def:supermod_curvature}.
\end{restatable}
This lemma is the key to the analysis of
Algorithm~\ref{alg:mnnucb_dist} in
Section~\ref{sec:separate_feedback}. We remark that the approximation
ratio above is different from~\citet{distorted_greedy}.
This is due to us fixing an error that we
noticed in their analysis, which caused the approximation ratio to
change from their
$\alpha = \min \left\{ 1 - \frac{\kappafk}{e}, \; 1 -\kappagk e^{(1 -
    \kappagk)} \right\}$ to our above.  Details are in
Appendix~\ref{sec:app_error}.  Additionally, note that
\citet{sviridenko2017optimal} provided a $1-\frac{\kappa_f}{e}$ lower
bound for monotone submodular maximization and later on,
\citet{jeff_bp} obtained a $1-\kappa^g$ lower bound for monotone
supermodular maximization. Our approximation ratio in
Equation~\eqref{eq:bp_regret_strong}
is simply the minimum of these two quantities.  In
Appendix~\ref{sec:app_distorted}, we provide a heat map that compares
this approximation ratio to that of \citet{jeff_bp}, showing that it
is strictly greater for all $\kappa_f, \kappa^g$.  Once
their analysis is fixed, we adapt their argument to the more general
case that allows for errors $r_j$ at each stage to complete the
robust online proofs.\looseness-1


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{No-Regret Single Feedback}
\label{sec:alpha_regret}

In the previous section, we considered the robustness of the greedy
algorithm in the offline setting.  We now return to our interactive
problem from Section~\ref{sec:problem-formulation} and will show how
it reduces to the offline problem.

First, we fully define the notion of scaled regret mentioned in
Section~\ref{sec:introduction}.  The scaling is chosen to compare with
the appropriate offline algorithm for the relevant function class; it
is standard to consider scaled regret for NP-hard problems (e.g.,
\citep{NIPS2017_krause}). Recall our interactive setup from
Section~\ref{sec:problem-formulation}.  Let $\Tvar_q$ represent the
number of items selected for function $h_q$ by the final round,
$\Tvar$, so that $\sum_{q=1}^m \Tvar_q = \Tvar$.  The set
$S_{\Tvar_q,q}$ is the final selection for $h_q$ and
we set $S_q = S_{\Tvar_q,q}$ for notational simplicity.  Let
$ S_q^* \in \argmax_{|S| \le \Tvar_q} h_{q} (S) $ be a
maximizing payoff set for
$h_{q}$ with at most $T_q$ elements.
Inspired by~\citet{jeff_bp} and
with respect to the approximation ratio
obtained for the greedy baseline for BP functions,
we define
the regret metric $\mathcal{R}_{\text{BP}}(\Tvar)$ as follows:
\begin{align}
  \label{eq:bp_regret}
\mathcal{R}_{\text{BP}}(\Tvar) := \sum_{q=1}^m \frac{1}{\kappa_{q,f}} \left[1 - e^{-(1 - \kappa_q^g) \kappa_{q,f}} \right] h_q (S_q^\ast) - h_q(S_q).
\end{align}
From Lemma~\ref{prop:robust_greedy_bp},
we recognize that if our online algorithm is approximately greedy as in Equation~\eqref{eq:approximate_selection_rule},
then our regret will be bounded by the accumulation of the approximation errors $r_j$.
This observation bridges the gap between the online and offline settings.
Hence, our goal is to design an algorithm that satisfies this property.
Analogously, for functions with bounded submodularity ratio $\gamma_q$ (Definition~\ref{def:submod_ratio}) and generalized curvature $\zeta_q$ (Definition~\ref{def:gen_curvature}), we define:
\begin{equation}
\label{eq:SR_regret}
\mathcal{R}_{\text{WS}} (\Tvar) := \sum_{q=1}^m \frac{1}{\zeta_q} \left[1 - e^{-\zeta_q \gammak} \right] h_q (S_q^\ast) - h_q(S_q).
\end{equation}
If we knew all the functions $\{h_1 \ldots h_m\}$, we could select the
greedy item at each stage and achieve zero regret.  Define
$\Delta(\phi, S, v) = h_\phi (v|S)$ to encapsulate all our $m$ latent
objectives succinctly; we also further below use the notational shortcuts
$x_\tvar = (\phi_{u_\tvar}, S_{u_\tvar}, v_\tvar)$ and $\Delta(x_t)$. If we knew this
function, we would know $\{h_1 \ldots h_m\}$ as well. Thus, our task
is to design a procedure to estimate $\Delta(\phi, S, v)$ from data
such that the approximation errors $r_j$ reduce over time.

To make this possible, we must make additional assumptions on
$\Delta(\cdot)$.  To see why, consider what we can infer from an
observation without any additional assumptions.
In the BP case, for instance, the $q$-th BP gain function is
uniquely defined by $2^{|V|}$ function evaluations $h_q(v|S)$ for each
possible $(v, S)$.  If we observe $h_q(v|S)$ for some $(v, S)$, then
we can only make inferences about $f_q(v|A)$ and $g_q(v|A)$ for all $A \subseteq S$ or
$A \supseteq S$; since we can only choose item $v$ once during the
optimization for user $q$, this information is not useful practically.
This motivates the following assumption.\footnote{See
  \citep{berlinet2011reproducing} for a comprehensive treatment of
  RKHS and kernels.}%
\begin{assumption}
  \label{ass:rkhs}
  The $\Delta(\cdot)$ function lives in a Reproducing Kernel Hilbert
  Space (RKHS) associated with some kernel $\kernel$ and has bounded norm
  i.e $\|\Delta\|_\kernel \leq B$.
\end{assumption}

The assumption ensures the outputs of the
$\Delta(\cdot)$ function vary smoothly with respect to the inputs and
is standard with GPCBs~\citep{seeger2008information,icml2010_srinivas,
  nips2011_krause, NIPS2017_krause}.  E.g., if two related movies are
watched by two similar users, they should provide similar
ratings. Thus, each query provides information about all
$m \cdot 2^{|V|}$ other possible queries to all functions, making
estimation feasible since the kernel
$\kernel((\phi_q, S, v), (\phi_{q'}, S', v'))$ measures similarity between
two inputs.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{MNN-UCB Algorithm}
\label{app:one_page_alg}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{algorithm}[t]
  \caption{MNN-UCB}
  \label{alg:smucb}
   \textbf{Input} set $V$, $\kernel$ kernel function
\begin{algorithmic}[1]
    \STATE Init $S_{q} \leftarrow \varnothing, V_{q} \leftarrow V, \; \forall q \in[m]$
    \STATE Init $X_0 \leftarrow \varnothing, G_1 \leftarrow \varnothing$
    \FOR{$\tvar$ $\in 1,2,3 \ldots \Tvar$}
    \STATE Observe $u_\tvar$ from environment.
    \IF{$\tvar = 1$}
      \STATE Choose $v_1 \in V_{u_\tvar}$ uniformly at random
    \ELSE
        \STATE Update $\mathbf{y}_{\tvar} \leftarrow\left[y_{1}, y_{2}, \ldots, y_{\tvar-1}\right]^{\top}$
	\STATE $\mu_\tvar,\sigma_\tvar = \textsc{MeanVarCalc}$ \\
        \STATE \; \; $\left(V_{u_\tvar}, \phi_{u_\tvar}, S_{u_\tvar}, G_\tvar, G_{\tvar-1}, x_{\tvar-1}, X_{\tvar-1}, \mathbf{y}_\tvar \right)$
        \STATE Select  $v_\tvar \leftarrow \operatorname{argmax}_{v \in V_{u_\tvar}} \mu_\tvar(v)+\beta_{\tvar} \sigma_\tvar (v)$
    \ENDIF \textcolor{blue}{\hspace{1ex} // $\beta_\tvar$ is defined in Eq.~\eqref{eq:beta}.}
    \STATE Update $S_{u_{\tvar}} \leftarrow S_{u_{\tvar}} \cup\left\{v_\tvar\right\}$, \;  $x_\tvar \leftarrow (\phi_{u_\tvar}, S_{u_\tvar}, v_\tvar)$, \; $X_\tvar \leftarrow X_{\tvar-1} \cup \left\{x_\tvar\right\}, V_{u_\tvar} \gets V_{u_\tvar} \setminus v_\tvar$
    \STATE Obtain feedback $y_\tvar = \Delta(x_\tvar) + \epsilon_\tvar$, \;
    \STATE  \vspace{0.3em} \textcolor{blue}{// Decide whether to store new point.}
    \STATE $G_{\tvar+1}$ =  \textsc{\NystromSelect}$\left(\kernel, G_\tvar, x_\tvar \right) $
    \ENDFOR
  \end{algorithmic}
  \vspace{-.2\baselineskip}
\end{algorithm}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{algorithm}[t]
  \caption{\textsc{\NystromSelect} \cite{zenati22a}.}
  \label{alg:nystrom}
\textbf{Input:} $\kernel, G_\tvar, x_\tvar$;
\textbf{Locally stored variables:} List $L$  \\
\textbf{Hyperparams:}Regularization $\lambda$, Accuracy $\eta$, Budget $b$ \\[-1\baselineskip]
\begin{algorithmic}[1]
\STATE If first call, init $L$ to empty list.
\STATE Compute leverage score $\hat{\tau}_\tvar(\lambda, \eta)$ from Eq.~\eqref{eq:leverage_score}
\STATE With probability $\min(b \hat{\tau}_\tvar(\lambda, \eta), 1)$ include $x_\tvar$ in $G_{\tvar+1}$.
\STATE Append $\hat{\tau}_\tvar(\lambda, \eta)$ to L
\end{algorithmic}
\textbf{Result: } $G_{\tvar+1}$
\end{algorithm}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{algorithm}[t]
  \caption{\textsc{MeanVarCalc}.}
  \label{alg:mean_var}
  \textbf{Input:} $V_{u_\tvar}, \phi_{u_\tvar}, S_{u_\tvar}, G_\tvar, G_{\tvar-1}, x_{\tvar-1}, S, \mathbf{y}_\tvar$\\
\textbf{Locally stored variables: } $L_1, L_2, L_3$ \\
\textbf{Hyperparams:} Regularization $\lambda$

\begin{algorithmic}[1]
\STATE  \vspace{0.3em} \textcolor{blue}{// Update $\Kgginv, \Lambda_{\tvar}, \mathbf{\tilde{y}}_{\tvar}$.}
\IF{$|G_\tvar| = 1$}
\STATE Init $L_1, L_2, L_3$ each to empty lists
\STATE Init $\tildeyvect =  y_\tvar \kernel(x_{\tvar-1},x_{\tvar-1})$
\STATE Init $\; K_{G_\tvar G_\tvar}^{-1} = 1/\kernel(x_{\tvar-1},x_{\tvar-1})$
\STATE Init $\Lambda_\tvar = 1/[\kernel(x_{\tvar-1},x_{\tvar-1})^2 + \lambda \kernel(x_{\tvar-1},x_{\tvar-1})]$.
\ELSIF{$G_\tvar = G_{\tvar-1}$}
  \STATE Update $\Lambda_\tvar$ using Eq~\eqref{eq:sherman_morrison_lambda}
\STATE $\tildeyvect = \tildeyvecold + y_\tvar \kernel_{G_\tvar}(x_{\tvar-1})$
\ELSE
% \STATE $x = G_{\tvar+1} \backslash G_\tvar$
\STATE Update $\Lambda_\tvar$ using Eq.~\eqref{eq:sherman_morrison_lambda} with Schur complements.
\STATE Update $K_{G_{\tvar} G_{\tvar}}^{-1}$ using Eq~\eqref{eq:schur_kinv} with Schur complements \textcolor{blue}{// here and line 11 use lists $L_1, L_2, L_3$}
\STATE $\tildeyvect = [\tildeyvecold + y_\tvar \kernel_{G_\tvar}(x_{\tvar-1}), K_S (x_{\tvar-1})^\top \mathbf{y}_\tvar]^\top$
\ENDIF
\STATE $z_\tvar \leftarrow (\phi_{u_\tvar}, S_{u_\tvar})$
\STATE Append $(\Kgginv, \Lambda_\tvar, \tildeyvect)$ to $L_1, L_2, L_3$ lists resp
\STATE  \vspace{0.3em} \textcolor{blue}{// Calculate mean and variance vectors.}
\FOR{$v \in V_{u_\tvar}$}
    \STATE $\mutildei(v) \leftarrow \kernel_{G_\tvar}((z_\tvar, v))^{\Tvar}\ \Lambda_\tvar \tildeyvect$
    \STATE $\delta_\tvar (v) \leftarrow \kernel_{G_\tvar}((z_\tvar, v))^{\Tvar} (\Lambda_\tvar - \lambda^{-1} \ \Kgginv) \kernel_{G_\tvar}((z_\tvar, v))$
    \STATE $\sigmatildei(v)^2 \leftarrow \lambda^{-1} \kernel((z_\tvar, v), (z_\tvar, v)) + \delta_\tvar (v)$
\ENDFOR
\end{algorithmic}
\textbf{Result: } $\{\mutildei(v)\}_{v \in V_{u_\tvar}},  \{\sigmatildei(v)\}_{v \in V_{u_\tvar}}$
\end{algorithm}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

Algorithm~\ref{alg:smucb} is inspired by~\citep{NIPS2017_krause,
  zenati22a} based on Upper Confidence Bound (UCB) algorithms for
kernel bandits.  At time $\tvar \in [\Tvar]$, the optimizer has
available the noisy evaluations of the unknown $\Delta(\cdot)$
function in vector $\mathbf y_\tvar = (y_j)_{j=1}^{\tvar-1}$ for
corresponding inputs held in vector $X_\tvar = (x_j)_{j=1}^{\tvar-1}$
--- these are updated at the end of each iteration.  These
are used by the subroutine \textsc{MVCalc} that, using GP kernel techniques
\citep{Valko2013FiniteTimeAO, icml2010_srinivas} and the
\Nystrom{} set of samples $G_t \subset X_t$, efficiently
compute estimates of the GP posterior distribution's conditional mean
and variance used for the UCB marginal gains 
in the maximization (line 10 of Algorithm~\ref{alg:smucb}).

That is, the algorithm chooses the item $v_t$ that has the highest UCB
in line~$11$ where the parameter $\beta_\tvar$ controls the
algorithm's propensity towards either exploration or exploitation (see
Appendix~\ref{sec:remark-steps-beta_t}).  We use the notation
$\kernel_A (x) = [\kernel(x_1, x) \ldots \kernel(x_{|A|}, x)]$ to measure the similarity
between $x$ and every element in
$A = \{x_j\}_{j \in \{ 1, \dots, |A|\}}$. Hence,
$\kernel_{G_\tvar}((\phi_{u_\tvar}, S_{u_\tvar}, v))$ measures the
similarity of the input $(\phi_{u_\tvar}, S_{u_\tvar}, v)$ to the
historical data in \Nystrom{} set $G_\tvar$. Notation
$K_{AB}(v) = [\kernel(x, x')]_{x \in A, x'\in B}$ contains the matrix of
pairwise kernel-similarities for elements in $A, B$ and
$K_{G_\tvar G_\tvar}$ is the covariance matrix of the historical data
$G_\tvar$. 
Below, we describe the details for the two subroutines used in 
Algorithm~\ref{alg:smucb}, 
for the readers who are interested in calculations for kernel updates,
and selection of informative points to improve computation via \Nystrom{} sampling.

\paragraph{Efficiency and \textsc{\NystromSelect}}
In prior submodular bandits work~\citep{NIPS2017_krause}, each
iteration $\tvar \in [\Tvar]$ needs to invert a $\tvar \times \tvar$
matrix since \emph{all} historical data $X_t$ is used when calculating
the conditional means and variances.
Even if online matrix-inverse techniques are used, the
run-time becomes $O(\Tvar^3)$, which is impractical.
We use \Nystrom{} sampling to mitigate this and only use a selected subset
$G_\tvar \subset X_t$ of historical data to compute
$\mu_\tvar(v), \sigma_\tvar(v)$ for all $v \in V_{u_\tvar}$
\citep{zenati22a}.  \Nystrom{} sampling chooses the points that are
most useful for prediction. To define this precisely, we
introduce a bit of notation. Define $G' = G_\tvar \cup x_\tvar$.
Define the estimated
leverage score $\hat{\tau}_\tvar(\lambda, \eta, x)$ as:
\begin{equation}
  \label{eq:leverage_score}%
%    \hat{\tau}_\tvar =
  \frac{1 + \eta}{\lambda} 
  \left[\kernel(x, x) - \tilde{\kernel}_{G'}(x) 
  (\tilde{K}_{G'G'} + \lambda I)^{-1} \tilde{\kernel}_{G'}(x) \right]
\end{equation}
Define:
\[
M_\tvar = 
\begin{bmatrix}
  \text{diag}\left([\min(\hat{\tau}_j(\lambda, \eta, x_j), 1)]_{x_j \in G_\tvar} 
\right) & \textbf{0} \\
\textbf{0} & 1
\end{bmatrix}
\] 
It is the diagonal scaling matrix of (clipped) leverage scores of past selected points 
with an extra entry with value $1$ for the hypothetical new point $x_t$.
In Equation~\eqref{eq:leverage_score} above, we define
$\tilde{\kernel}_{G'}(x_\tvar) = M_\tvar^\top \kernel_{G'}(x_\tvar)$ and
$\tilde{K}_{G'G'} = M_\tvar^\top K_{G' G'} M_\tvar$.
The point $x_\tvar$ is included into the \Nystrom{} set $G_{\tvar+1}$
in with probability proportional to $\hat{\tau}_\tvar(\lambda, \eta, x_\tvar)$
(line 3 of Algorithm~\ref{alg:nystrom}). To understand why this is
reasonable, note that $\hat{\tau}_\tvar(\lambda, \eta, x_\tvar)$ is shown to
estimate the ridge leverage score (RLS) of a point well
\citep{calandriello17a, calandriello2019}. The RLS measures
intuitively how correlated the new point is to previous points; if it
is highly correlated, it will be sampled with low probability, but if
it is orthogonal, it will be sampled with high probability. 
This procedure improves the runtime of Algorithm~\ref{alg:smucb} to 
$O(\Tvar |G_T|^2)$, where $|G_T|$ is the number of selected \Nystrom{} points 
until the final timestep. 
A discussion on setting the hyperparameters $\eta$ and $b$,
controlling the tradeoff between regret and computation,
is given in Appendix~\ref{sec:remark-hyperparameters}.


\paragraph{\textsc{MVCalc}}
This subroutine calculates the posterior mean and variance for $\Delta(\cdot)$ using Gaussian process posterior calculations after projecting on the \Nystrom{} points $G_\tvar$. We define the intermediate quantity
\[
\Lambda_\tvar = \left(K_{G_\tvar S_\tvar} K_{S_\tvar G_\tvar} + \lambda K_{G_\tvar G_\tvar} \right)^{-1},
\]
which is useful in these updates.
Note that the algorithms store and track the local variables $\Kgginv, \Lambda_{\tvar}, \mathbf{\tilde{y}}_{\tvar}$ across time steps.
It needs to incrementally invert $\Kgginv$ and $\Lambda_{\tvar}$ as the time steps continue.
For $\Lambda_{\tvar}$, if $G_\tvar$ does not change, it does this using the Sherman-Morrison formula:
  \begin{equation}
    \label{eq:sherman_morrison_lambda}
  \Lambda_{\tvar} = \Lambda_{\tvar-1} - \frac{\Lambda_{\tvar-1} \kernel_{G_\tvar} (x_\tvar) \kernel_{G_\tvar} (x_\tvar)^\top \Lambda_{\tvar-1}}{1 + \kernel_{G_\tvar} (x_\tvar)^\top \Lambda_{\tvar-1} \kernel_{G_\tvar} (x_\tvar)}.
  \end{equation}
This update takes $|G_\tvar|^2 $ time.
In the case that $G_\tvar$ changes, we can write the expression for $\Lambda_{\tvar+1}$ as follows. In the below, let $a = K_{G_\tvar} (x_\tvar)$ and $c = \kernel(x_\tvar, x_\tvar)$.
\begin{equation}
\label{eq:schur_lambda}
\Lambda_{\tvar+1} = \begin{bmatrix}
  K_{G_\tvar S} K_{S G_\tvar} + a a^\top & K_{G_\tvar S}^\top a + c a \\
  a^\top K_{S G_\tvar} + c a & a^\top a + c^2
\end{bmatrix}^{-1}.
\end{equation}
We can use the Schur complement block-matrix inverse identity to evaluate the above
which takes $O(\tvar |G_\tvar|)$ time.
Similarly, for $K_{G_{\tvar} G_{\tvar}}^{-1}$, we write it in block-matrix form as:
\begin{equation}
\label{eq:schur_kinv}
K_{G_{\tvar} G_{\tvar}}^{-1} = \begin{bmatrix}
  K_{G_{\tvar-1} G_{\tvar-1}} & K_{G_{\tvar-1}}(x_\tvar) \\
  K_{G_{\tvar-1}}(x_\tvar)^\top & \kernel(x_\tvar, x_\tvar)
\end{bmatrix}^{-1},
\end{equation}
computable using Schur complements in $|G_\tvar|^2$ time.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Theoretical guarantee}
\label{sec:theor-guar}

For a given set
$X_T = \{x_1, \ldots, x_T\}$, all our bounds are stated in terms of the
effective dimension of the matrix $K_T = K_{X_T X_T}$, as described in
\cite{hastie2009elements}:
\begin{definition}
    \label{def:deff}
    $\deff (\lambda, \Tvar) = \text{Tr} (K_{T} (K_{T} + \lambda I)^{-1})$
  \end{definition}

Intuitively, the effective dimension is a measure of the number of
dimensions in the feature space that are needed to capture data variations.
Having a smaller effective dimension enables learning
the unknown $h_q$ with fewer samples.  If the empirical kernel matrix
$K_T$ has eigenvalues $(\lambda_1 \ldots \lambda_\Tvar)$, the
effective dimension can equivalently be written as
$\deff (\lambda, \Tvar) = \sum_{\tvar = 1}^\Tvar \frac{\lambda_\tvar}{\lambda_\tvar + \lambda}.$
Thus, if the eigenvalues decay quickly, the denominator will dominate for most summands and $\deff$ will be small.
If the kernel is finite dimensional, with dimension $s$, then only the first $s$ terms in the summation will be nonzero and $\deff \leq s$.
Having a small effective dimension makes the problem of learning the unknown objective function easier and hence improves our regret guarantee.
This quantity is inspired by classical work in statistics \citep{hastie2009elements, NIPS2002_tong}.

It is instructive to bound $\deff$ for different kernels.
We relate $\deff$ to the information gain $\tilde{\gamma}(\lambda,\Tvar)$ \citep{icml2010_srinivas}.
\begin{align*}
  \deff (\lambda, \Tvar) = \sum_{\tvar = 1}^\Tvar \frac{\frac{\lambda_\tvar}{\lambda}}{\frac{\lambda_\tvar}{\lambda} + 1} \leq \sum_{\tvar=1}^\Tvar \log\left(1 + \frac{\lambda_\tvar}{\lambda}\right) = \tilde{\gamma}(\lambda,\Tvar).
\end{align*}
Here, we used the inequality $\frac{x}{x+1} \leq \log(1+x)$ for $x \geq -1$.
\citet{icml2010_srinivas} provides bounds on $\tilde{\gamma}(\lambda,\Tvar)$ for various kernels.
For the (most popular) Gaussian kernel with dimension $d$,
they show that $\tilde{\gamma}(\lambda,\Tvar) \leq \log(\Tvar)^{d+1} $
which holds under the assumption that the eigenvalues $\lambda_\tvar$ are square summable.
We plot the exact $\deff (\lambda, \Tvar)$ for the Gaussian kernel in Figure~\ref{fig:deff_b} thereby verifying this
bound empirically.
For the linear kernel with dimension $d$, we have  $\tilde{\gamma}(\lambda,\Tvar) \leq d \log(\Tvar)$.
In our experimental results (see Section~\ref{sec:app_numerical}), we use a composite over three constituent kernels
and Theorems~2 and 3 of \citet{nips2011_krause} bound $\deff$ for the product or sums of kernels,
when $\tilde{\gamma}(\lambda, \Tvar)$ is bounded for each constituent.
This all ensures our regret bounds are sublinear in practice.

Now, we are ready to state our main result.
\begin{theorem}
  \label{thm:bp_regret_bound}
  Let Assumption~\ref{ass:rkhs} hold and assume that $\epsilon_\tvar$ are i.i.d centered sub-Gaussian (i.e., light tailed) noise.
  Then MNN-UCB (Algorithm~\ref{alg:smucb}) obtains the following regret:
  \begin{enumerate}[label=(\alph*),leftmargin=*,labelindent=0em,partopsep=-8pt,topsep=-8pt,itemsep=-2pt]
    \item When all $h_q$ are BP functions, we have that
    $\mathbb{E} [\mathcal{R}_{\text{BP}}(\Tvar)] \leq O\left(\sqrt{\Tvar} \left(B \sqrt{\lambda \deff} + \deff\right) \right) $
    \item When all $h_q$ are WS functions, we have that
    $\mathbb{E} [\mathcal{R}_{\text{WS}}(\Tvar)] \leq O\left(\sqrt{\Tvar} \left(B \sqrt{\lambda \deff} + \deff\right) \right) $
  \end{enumerate}
\end{theorem}

Below, we prove case (a); for the proof of case (b), refer to Appendix~\ref{sec:app_smucb}.
We see that Lemma~\ref{prop:robust_greedy_bp} and our algorithm design 
enables us to relate our notion of regret with the pointwise notion of regret from \citet{zenati22a}.
\begin{proof}
  We define $R_\tvar = \sum_{j=1}^\tvar r_j$ where
  $r_\tvar = \sup_{v \in V} h_{u_\tvar} (v | S_{t_{u_\tvar},u_\tvar}) -
  h_{u_\tvar} (v_{\tvar} | S_{t_{u_\tvar},u_\tvar})$.  Notice
  $R_\tvar$ is different from $\mathcal{R}_{\text{BP}}(\tvar)$. From
  Lemma~\ref{prop:robust_greedy_bp} for all $q$, and with
  $\mathcal{R}_{\text{BP}}(\Tvar)$ defined in
  Equation~\eqref{eq:bp_regret}, we have
  $\mathcal{R}_{\text{BP}}(\Tvar) \leq \sum_{\tvar = 1}^\Tvar r_\tvar
  = R_\Tvar$.
  We model the problem of the present work as a contextual
bandit problem in the vein of \cite{zenati22a}. Here, the
  context in round $\tvar$ is
  $z_\tvar = (\phi_{u_\tvar}, S_{t_{u_\tvar},u_\tvar})$. We next invoke
  Theorem~4.1 in \cite{zenati22a} to complete our result. Further details are in Appendix~\ref{sec:app_smucb}.
\end{proof}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{No-Regret Separate Feedback}
\label{sec:separate_feedback}

In the previous section, for BP functions, 
we obtained sublinear $\alpha$-regret with respect to the offline greedy baseline
under the \emph{monolithic feedback} setting from 
Section~\ref{sec:problem-formulation}.
In this section, we study whether obtaining \emph{seperate feedback} for 
the submodular and supermodular parts of the BP function can enable
us to provide a stronger guarantee. 
Towards, this end, we make the following assumption:
\begin{assumption}
  \label{ass:separate}
For any $(v,A)$, the optimizer has access to two oracles that provide it with separate feedback for the submodular 
$f_q(v|A)$ and supermodular $g_q(v|A)$ parts. \label{assump:1.2}
\end{assumption}
This would be satisfied 
for applications where we have more fine-grained feedback than just a single reward. 
For movie recommendations, we could ask users questions like 
“why did you like this movie?” or 
“what would your rating for this movie have likely looked like if you had not watched X?”.
Alternatively, we could 
estimate this from other similar users who had not watched "X" -- 
this is likely to be what would be done in practice when a large user base is available. 
For training subset selection, 
for any chosen point, we could look at the density of nearby points from the opposite class to 
deduce whether the benefit is coming from complementary selection of points 
- see Figure~\ref{fig:active_learn} for an illustration.

In addition to the above, we need another technical assumption.
In order to state this, we first introduce some notation.
As in Section~\ref{sec:robustness}, we define for each function $q$
the modular lower bound $l_{q,1}(A)$ as the totally normalized
submodular function $\fkone$ and also $\pi_{j, \phi_q} (v | A)$
as:
\begin{align}
  \left(1 - \frac{1}{\Tvar_q}\right)^{\Tvar_q - j - 1} f_{q,1} (v|A) + g_q(v|A) + l_{q,1}(v).
\end{align}
We use $\pi_{j, q} = \pi_{j, \phi_q}$ interchangeably for readability.
Now, we additionally assume the following for each $q \in [m]$.
\begin{assumption}
    \label{ass:modular}
    \begin{mylist}[label=(\alph*), ref={Assumption~(3\alph*)}]
      \item The modular lower bound $l_{q,1} (\cdot)$ is known by the optimizer. \label{assump:1.1}
      \item The number of items for each user $T_q$ is known by the optimizer. \label{assump:1.3}
    \end{mylist}
\end{assumption}

For \ref{assump:1.1}, note the modular lower bound is the summation 
over items for the minimum possible submodular gain of 
selecting that item. 
For the case of movie recommendation, 
there is likely to be some marginal enjoyment from watching a movie, 
even if all other movies in the set have already been watched;
this would of course depend on the overall size of the ground set. 
We could imagine that domain knowledge could indicate what 
these “least gain” quantities would look like - 
if we are unsure, we can always choose a conservative estimate and the bound would degrade smoothly. 
Note also that the modular lower bound is defined by $|V|$ function evaluations, 
whereas the submodular function is defined by $2^{|V|}$. 
Hence, the submodular $f_q$ is still mostly unknown
as knowing $l_{q,1}$ is a much 
weaker assumption than the offline setting.
For~\ref{assump:1.3}, if $T_q$ is not known beforehand,
``guess and double" techniques
(Appendix~\ref{sec:remarks-guess-double}) can be used, the effects of
which result in a bounded additive term.

Given the richer feedback model,
we provide sublinear regret guarantees for
the stronger notion of regret for BP functions with respect to the
\textbf{distorted greedy} baseline (Section~\ref{sec:sub_distorted_robustness}). 
This is defined as:
\begin{equation}
  \label{eq:bp_regret_strong}
  \begin{split}
  % \mathcal{R}_{\text{BP}, 2} (\Tvar) :=
  & \mathcal{R}_{\text{BP}, 2} (\Tvar) :=  \\
  & \sum_{q=1}^m \min \left\{ 1 - \frac{\kappafk}{e}, \; 1 -\kappagk \right\} h_q (S_q^\ast) - h_q(S_q).
\end{split}
\end{equation}
For applications where \ref{assump:1.1} is not satisfied, we
additionally provide an alternate result in
Appendix~\ref{sec:analys-with-refass} without this assumption. 
Here, the $\alpha$ is
slightly reduced to
$\min \left\{ 1 - \frac{1}{e}, \; 1 -\kappagk \right\}$.  However, the
heat map in Figure~\ref{fig:contour} illustrates the bounds are
still better than the vanilla greedy $\alpha$ from~\cite{jeff_bp} for
most choices of $\kappafk, \kappagk$.
And in cases where Assumption~\ref{ass:separate} does not hold,
we can default to the results from Section~\ref{sec:theor-guar}.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Algorithm and Theoretical Guarantee}
\label{sec:textscmnn-ucb-separ}
\label{sec:perf-guar-bp}

We present below our modified algorithm for the case 
of richer feedback.

\begin{algorithm}[tbh]
  \caption{MNN-UCB-Separate (modified Algorithm~\ref{alg:smucb})}
  \label{alg:mnnucb_dist}
  Line 14 of Algorithm~\ref{alg:smucb} replaced with the following:\\[-1\baselineskip]
  \begin{algorithmic}[1]
      \STATE Calculate distortion $D_\tvar \gets (1 - \frac{1}{\Tvar_{u_\tvar}})^{\Tvar_{u_\tvar} - |S_{u_\tvar}| - 1}$.
      \STATE Obtain submodular feedback $y_{f, \tvar} = f_{u_\tvar} (v_{\tvar}| S_{u_\tvar}) + \epsilon_{f,\tvar}/2$ and $y_{g, \tvar} = g_{u_\tvar} (v_{\tvar}| S_{u_\tvar}) + \epsilon_{g,\tvar}/2$.
      \STATE Apply distortion to obtain overall feedback $y_\tvar = D_\tvar y_{f, \tvar} + y_{g, \tvar} + (1 - D_\tvar) l_{u_\tvar,1}(v_\tvar)$.
    \end{algorithmic}
    \vspace{-0.2\baselineskip}
  \end{algorithm}
  \vspace{-0.2\baselineskip}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
Algorithm~\ref{alg:mnnucb_dist} is quite similar to
Algorithm~\ref{alg:smucb} --- line $14$ of Algorithm~\ref{alg:smucb}
is modified to the three steps of
Algorithm~\ref{alg:mnnucb_dist}. That is, the feedback, now obtained
separately as $y_{f,\tvar}$ for the submodular and as $y_{g,\tvar}$
for the supermodular part, is aggregated in line $3$ of
Algorithm~\ref{alg:mnnucb_dist} as per $\pi_{j, \phi_q}$.
% We next state
Our guarantee for Algorithm~\ref{alg:mnnucb_dist} is the following:
\begin{theorem}
  \label{thm:bp_regret_bound_dist_strong}
  Let Assumptions~\ref{ass:rkhs}, \ref{ass:separate}, 
  \ref{ass:modular} hold and assume that $\epsilon_\tvar$ are i.i.d centered sub-Gaussian noise.
  Then, when all $h_q$ are BP functions, Algorithm~\ref{alg:mnnucb_dist} yields
\[
  \mathbb{E}[\mathcal{R}_{\text{BP}, 2} (\Tvar)] \leq 
  O\left(\sqrt{\Tvar} \left(B \sqrt{\lambda \deff} + \deff\right) \right)
\]
\end{theorem}
The proof follows along similar lines as Theorem~\ref{thm:bp_regret_bound}
and is included in Appendix~\ref{sec:separate_proof}.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
\section{Numerical Experiments}
\label{sec:numerical}
From MovieLens, we obtain a ratings matrix
$M \in \mathbb{R}^{900 \times 1600}$, where $M_{i,j}$ is the
rating of the $i^\text{th}$ user for the $j^\text{th}$
movie. Using this dataset, we instantiate an interactive BP
maximization problem, as formulated in Vignette~\ref{vignette:movie}.
\begin{figure}[t] 
\centering
\includegraphics[width=0.8\columnwidth]
{figs/baseline_plot_final_ill_trimmed.pdf}
\caption{Algorithm~\ref{alg:smucb} (magenta, green) and
  Algorithm~\ref{alg:mnnucb_dist} (gold) applied to the MovieLens dataset. The
  highlighted region shows the standard deviation over $10$ random
  trials.}
\label{fig:movie_performance}
\end{figure}
We cluster the users into $m = 10$ groups using the $k$-means
algorithm and design a BP objective for each user-group. The
objective for the $q_{th}$ group is decomposed as
$h_q(A) = \sum_{v \in A} m_q(v) + \lambda_1 f_q(A) + \lambda_2
g_q(A)$, where the modular part $m_q(v)$ is the average rating for
movie $v$ amongst all users in group $k$. The concave-over-modular
submodular part encourages the recommender to maintain a balance
across genres in chosen suggestions.  By contrast, the supermodular
function is designed to encourage the optimizer to exploit
complementarities within genres. 
The constants $\lambda_1, \lambda_2$
are chosen so that the supermodular part slightly dominates the
submodular part, since previous work already studies primarily
submodular functions. 
Further details are provided in Section~\ref{sec:app_numerical}. 
Note that these functions are hand-crafted for illustration purposes
and are unknown to the optimizer in all cases.

In Figure~\ref{fig:movie_performance}, the red curve corresponds
to a naive baseline where movies are recommended at random.
The green curve corresponds to the the algorithm from 
\cite{NIPS2017_krause}, where the supermodular part is ignored.
The magenta curve corresponds to Algorithm~\ref{alg:smucb} from
Section~\ref{sec:alpha_regret} in the case where monolithic
feedback is provided to the optimizer. The gold curve corresponds
to Algorithm~\ref{alg:mnnucb_dist} when separate feedback is provided
to the optimizer.

Since both the gold and purple curves are significantly better than the green, we see 
the pitfall of modeling 
a BP problem as purely submodular.
The improved performance of the gold curve over the purple 
shows the impact of the stronger feedback in 
Assumptions~\ref{ass:separate}, \ref{ass:modular} on performance.
This corroborates that the regret 
guarantee in Theorem~\ref{thm:bp_regret_bound_dist_strong} 
is stronger than that from Theorem~\ref{thm:bp_regret_bound}.
Active learning experiments are given in Appendix~\ref{sec:active-learning-1}.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
\section{Discussion}
 
\label{sec:discussion}

In this paper, we presented algorithms for 
efficient online optimization of certain non-submodular functions,
which enables better modeling in some applications. 
We considered two different feedback
models and provided variants of optimistic kernel-bandit algorithms
that achieve sublinear regret. 
Along the way, we studied the robustness of the greedy algorithm
for these function classes in Section~\ref{sec:robustness}, which
is of independent interest.

\paragraph{Limitations and Future Work.} 
A limitation of Gaussian Process based methods is that the model updates
at each stage are computationally expensive, making them difficult to use
in practice. While we made headway towards addressing this problem, our time
complexity of $O(T |G_T|^2)$ may still be prohibitive for some applications 
of optimizing submodular or beyond submodular functions.
In these cases, other prediction and uncertainty quantification techniques
may be used - such as the bootstrap.
Even simpler heuristic approaches may be employed that balance between areas of
the input space
that the learned model thinks are promising, 
and those that are underexplored. 
Our theory suggests that such methods are likely to work well.

In general, a decomposition of the
utility function $h$ could take other forms, such as a quotient of two
submodular functions, or as a product of a submodular and supermodular
function.  The additive BP form is emphasized due to its analytical
tractability, its ability to understand bounds, and its greater
expressivity, but the future holds no limit regarding possible
decomposable $h$s that are viable in the separate feedback setting
that we introduced. A theoretical artifact of the curvature-based
bound for non-modular functions is that they are independent of $f_q$
and $g_q$'s relative magnitude which are important for experiments. It
would be useful to develop guarantees that incorporate this.

\paragraph{Applications and Social Impact.} 
In Appendix~\ref{sec:app_applications}, we compare our proposed approach
with others from the literature for the motivating applications of 
recommendation systems and active learning. 
We include some illustrative examples, which show the utility
of modeling these problems as BP instead of purely submodular.

For recommendation systems,
we conjecture that explicitly modeling the diminishing/increasing returns
of utilities could help alleviate the primary problem of state-of-the-art 
systems today - that they are more likely to show addictive and harmful 
content in order to keep users glued onto the service. For active learning, 
we conjecture that considering utilities beyond submodular is likely to provide us with more flexibility in expressing and balancing our multiple goals when choosing a subset of datapoints for training.



\paragraph*{Acknowledgements}

AN would like to thank Mitas Ray for valuable early conversations
that led to the inception of this project. In addition, we would
like to thank the anonymous reviewers for their helpful feedback.
While working on this project, 
AN was supported by the Amazon Hub Fellowship at the University of Washington.
This work was also supported 
in part by NSF TRIPODS II-DMS 20231660, NSF CCF 2212261, 
NSF CCF 2007036, NSF AF 2312775, NSF IIS-2106937, NSF IIS-2148367.


\bibliography{references}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% APPENDIX
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\appendix
\newpage
\onecolumn

\section{Table of Notation}

\begin{table}[h]
  \centering
  \begin{tabular}{c l}
  \toprule
  \textbf{Notation} & \textbf{Description} \\
  \midrule
  $V$ & Ground set of items \\
  $m$ & Number of set functions \\
  $h_q$ & $q$-th set function, $q \in [m]$ \\
  $u_t$ & Index of arrived function at time $t$ \\
  $\phi_{u_t}$ & Context vector for function $h_{u_t}$ at time $t$ \\
  $v_t$ & Item selected at time $t$ \\
  $S_{k, q}$ & Items selected for function $h_q$ up to time $k$ \\
  $y_t$ & Noisy marginal gain feedback at time $t$ \\
  $y_{f,t}, y_{g,t}$ & Separate submodular and supermodular feedback at time $t$ \\
  $S_q^*$ & Optimal set for function $h_q$ \\
  $T_q$ & Number of items selected for function $h_q$ by time $T$ \\
  $\kappa_f, \kappa^g$ & Submodular and supermodular curvatures \\
  $\gamma, \zeta$ & Submodularity ratio and generalized curvature \\
  $\mathcal{R}_{\text{BP}}(T), \mathcal{R}_{\text{WS}}(T)$ & Regret for BP and WS functions \\
  $\mathcal{R}_{\text{BP},2}(T)$ & Regret for BP functions with separate feedback \\
  $\Delta(\phi, S, v)$ & Marginal gain of adding $v$ to $S$ for context $\phi$ \\
  $\mathcal{K}$ & Reproducing kernel Hilbert space (RKHS) \\
  $B$ & Bound on RKHS norm of $\Delta$ \\
  $G_t$ & Nyström set at time $t$ \\
  $\beta_t$ & Exploration-exploitation tradeoff parameter \\
  $d_{\text{eff}}(\lambda, T)$ & Effective dimension \\
  $l_1, l_2$ & Modular lower bounds for $f$ and $g$ \\
  $f_1, g_1$ & Totally normalized $f$ and $g$ \\
  $\pi_j(v|A)$ & Distorted marginal gain for selecting $v$ given $A$ at step $j$ \\
  \bottomrule
  \end{tabular}
  \caption{Table of key notation used in the paper.}
  \label{tab:notation}
\end{table}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Background on Submodularity, Supermodularity and Curvatures}
\label{sec:backgr-subm-superm}

A set function $h: 2^V \to \R$ is a function that maps any subset of a
finite ground set $V$ of size $|V|=n$ to the reals. There are many
possible set functions, and arbitrary set functions are impossible to
optimize with any quality assurance guarantee without an exponential
cost. As an example, consider a function $h$ such that $h(A) = a > b$
for some set $A \subseteq V$ and $a,b \in \R$ and $h(B) = b > 0$ for
all $B \neq A$. Then any algorithm that does not search over all
$2^{|V|}$ subsets can miss set $A$ and the approximation ratio $a/b$
can be unboundedly large. We are therefore interested in set functions
that have useful and widely applicable structural properties such as the
class of submodular and supermodular functions.

A set function $f: 2^V \to \R$ is said to be monotone non-decreasing
if $f(A \cup\{v\}) \geq f(A)$ for all $A \subseteq V, v \in V$. It is
normalized if $f(\emptyset) = 0$.  For convenience, we refer to the
collection of {\bf Monotone Non-decreasing Normalized} set functions as {\bf MNN}
functions. We use the gain notation $f(v|S)=f(S\cup \{v\})-f(S)$ to denote the {\bf marginal
gain} of adding element $v$ to the set $S$.

A set function $f$ defined over the ground set $V$ is called
{\bf submodular} if for all $A \subseteq B \subseteq V$ and any element
$v \notin B$ we have
$ f(A \cup\{v\})-f(A) \geq f(B \cup\{v\})-f(B)$. A function
$g: 2^V \to R$ is said to be {\bf supermodular} if ${-g}$ is
submodular --- $g$ has the property of \emph{increasing returns} where
the presence of an item can only enhance the utility of selecting
another item. The class of functions defined below is the primary
focus of our paper.

\begin{definition}[BP Function]
\label{def:bp}
  A utility function $h$ is said to be BP if it admits the decomposition $h = f + g$, where  $f$ is submodular, $g$ is supermodular, and both functions are also MNN.
\end{definition}

Next, we introduce the notion of curvature for submodular and
supermodular functions. This will enable us to understand the
assumptions required to obtain approximation bounds for offline BP
functions, as stated in~\cite{jeff_bp}.

\begin{definition}[Submodular curvature]
  \label{def:submod_curvature}
Denote the curvature for submodular $f$ as $
\kappa_{f} = 1 - \min_{v \in V} \frac{f(v| V \setminus \{v\})}{f(v)}
.$
\end{definition}

\begin{definition}[Supermodular curvature]
  \label{def:supermod_curvature}
Denote the curvature for supermodular $g$ as: $
  \kappa^g = 1 - \min_{v \in V} \frac{g(v)}{g(v| V \setminus \{v\})}
.$
\end{definition}
These quantities are contained in $[0,1]$ and measure how far the functions are from being modular: if a curvature is zero, the function is modular. Important for practical applications, given the function, these can be calculated in linear time in $|V|$.
\citet{jeff_bp} analyzed the greedy algorithm for the
cardinality-constrained BP maximization problem and provided a
$\frac{1}{\kappa_{f}} \left[1 - e^{-(1 - \kappa^g) \kappa_{f}}
\right]$ approximation ratio for this problem.  They also showed that
not all monotone non-decreasing set functions admit a BP
decomposition. However, in cases where such a decomposition is
available, one can easily compute the curvature of submodular and
supermodular terms and compute the bound.

Since not all MNN functions are representable as BP functions, we also study arbitrary MNN functions in terms of how far they are from being submodular.
\begin{definition}[Submodularity ratio, \citep{bian2019guarantees, das2011submodular,JMLR:v19:16-534}]
\label{def:submod_ratio}
The submodularity ratio of a non-negative set function $h(\cdot)$ is the largest scalar $\gamma$ such that
$
\sum_{v \in S \backslash A} h(v|A) \geq \gamma h(S|A), \forall S, A \subseteq V.
$
\end{definition}

The submodularity ratio measures to what extent $h(\cdot)$ has submodular properties. For a non-decreasing function $h(\cdot)$, it holds that $\gamma \in[0,1]$ always, and $h(\cdot)$ is submodular if and only if $\gamma=1$.

\begin{definition}[Generalized curvature, \citep{bian2019guarantees}]
\label{def:gen_curvature}
The curvature of a non-negative function $h(\cdot)$ is the smallest scalar $\zeta$ such that $\forall S, A \subseteq V, v \in A \backslash S$,
$
 h(v| A \backslash\{v\} \cup S)) \geq(1-\zeta) h(v| A \backslash\{v\}).
$
\end{definition}
Note that unlike the notions of submodular and supermodular curvature, the submodularity ratio and generalized curvature parameters are information theoretically hard to compute in general~\citep{jeff_bp}. We refer to the MNN set functions with bounded submodularity ratio $\gamma$ and generalized curvature $\zeta$ as \textbf{weakly submodular} (WS).
\citet{bian2019guarantees} analyzed the greedy algorithm for maximizing such functions subject to a cardinality constraint and obtained a $\frac{1}{\zeta}\big(1-e^{-\zeta \gamma}\big)$ approximation ratio for this problem.


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Other Related work}
\label{app:related_work}

\paragraph{Submodular maximization with bounded curvature.} \citet{nemhauser1978analysis} studied the performance of the greedy algorithm for maximizing a monotone non-decreasing submodular set function subject to a cardinality constraint and provided a $1-\frac{1}{e}$ approximation ratio for this problem. While \citet{nemhauser1978best} showed that the $1-\frac{1}{e}$ factor cannot be improved under polynomial number of function value queries, the performance of the greedy algorithm is usually closer to the optimum in practice. In order to theoretically quantify this phenomenon, \citet{conforti1984submodular} introduced the notion of \emph{curvature} $\kappa\in [0,1]$ for submodular functions---this is defined in Section~\ref{sec:backgr-subm-superm}. The constant $\kappa$ measures how far the function is from being modular. The case $\kappa=0$ corresponds to modular functions and larger $\kappa$ indicates that the function is more curved. \citet{conforti1984submodular} showed the greedy algorithm applied to monotone non-decreasing submodular maximization subject to a cardinality constraint has a $\frac{1}{\kappa}(1-e^{-\kappa})$ approximation ratio. Therefore, for general submodular functions ($\kappa=1$), the same $1-\frac{1}{e}$ approximation ratio is obtained. However, if $\kappa<1$, $\frac{1}{\kappa}(1-e^{-\kappa})>1-\frac{1}{e}$ holds and as $\kappa\to 0$, the approximation ratio tends to 1. More recently, \citet{sviridenko2017optimal} proposed two approximation algorithms for the more general problem of monotone non-decreasing submodular maximization subject to a matroid constraint and obtained a $1-\frac{\kappa}{e}$ approximation ratio for these two algorithms. They also provided matching upper bounds for this problem showing that the $1-\frac{\kappa}{e}$ approximation ratio is indeed optimal. Later on, the notion of curvature was extended to continuous submodular functions as well and similar bounds were derived for the maximization problem \citep{sadeghi2021differentially,sadeghi2020longterm,  sadeghi2021fast,sessa2019bounding}.


\paragraph{BP maximization}
\citet{jeff_bp} first introduced the problem of maximizing a BP function $h = f + g$ (Definition~\ref{def:bp}) subject to a cardinality constraint
as well as the intersection-of-$p$-matroids constraint. They showed that this problem is NP-hard to approximate to any factor without further assumptions. However, if the supermodular function $g$ has a bounded curvature (i.e., $\kappa^g<1$), it is possible to obtain approximation ratios for this problem. In particular, for the setting with a cardinality constraint, they analyzed the greedy algorithm along with a new algorithm (SemiGrad) and provided a $\frac{1}{\kappa_f}\big(1-e^{-(1-\kappa^g)\kappa_f}\big)$ approximation ratio. Note that for general supermodular functions ($\kappa^g=1$), the approximation ratio is 0 and as $\kappa^g\to 0$, the bound tends to that of \citet{conforti1984submodular} for monotone non-decreasing submodular maximization subject to a cardinality constraint. \citet{jeff_bp} also showed that not all monotone non-decreasing set functions admit a BP decomposition. However, in cases where such a decomposition is available, one can compute the curvature of submodular and supermodular terms in linear time and compute the bound. More recently, \citet{distorted_greedy} proposed a distorted version of the greedy algorithm for this problem and provided an improved $\min\{1-\frac{\kappa_f}{e},1-\kappa^g e^{-(1-\kappa^g)}\}$ approximation ratio  \cite{distorted_greedy}.

\paragraph{Submodularity ratio.}
\citet{das2011submodular} introduced the notions of submodularity ratio
$\gamma$ and generalized curvature $\zeta$ for general monotone non-decreasing set functions (defined in Section~\ref{sec:backgr-subm-superm}) and showed that the greedy algorithm obtains the approximation ratio $\frac{1}{\zeta}\big(1-e^{-\zeta \gamma}\big)$ under cardinality constraints \citep{bian2019guarantees, das2011submodular}. Unlike the BP decomposition, the notions of submodularity ratio and generalized curvature can be defined for any monotone non-decreasing set function but is, in general, exponential cost to compute.

\paragraph{Adaptive and Interactive Submodularity.}
\citet{NIPS2017_krause} is the work most related to ours -- they
employ a similar UCB algorithm to optimize an unknown submodular
function in an interactive setting. They define regret as the
sub-optimality gap with respect to a full-knowledge greedy strategy at
the final round. They define a different notion of pointwise regret as
the difference between the algorithm's rewards and that of the greedy
strategy \emph{at that stage}, treating the past choices as fixed. By
viewing the submodular problem as a special case of contextual
bandits, they observe that this accumulation of pointwise regret is
precisely bounded by~\cite{nips2011_krause}. Then, they modify the
seminal proof in~\citet{nemhauser1978analysis} to relate their target
notion of regret with the pointwise
regrets. \citet{guillory2010-icml-interactive-sub,krause_adaptive_submodular} also consider different adaptive or interactive
submodular problems. They both assume more knowledge of the structure of the
submodular objective than~\cite{NIPS2017_krause}.

\track{\paragraph{Kernel bandits.}
\citet{icml2010_srinivas} consider the problem of optimizing an unknown function $f$ that is either sampled from a Gaussian process or has bounded RKHS norm.
They develop an upper-confidence bound approach, called GP-UCB, that achieves sublinear regret with respect to the optimal, which depends linearly on an information-gain term $\gamma_\Tvar$. \citet{nips2011_krause} extend the setting from \citet{icml2010_srinivas} to the contextual setting where the function $f_{z_t}$ being optimized now depends also on a context $z_t$ that varies with time.
\citet{Valko2013FiniteTimeAO}
replace the $\gamma_\Tvar$ scaling with $\sqrt{\gamma_\Tvar}$. \citet{camilleri21a} extend experimental design for linear bandits to the kernel bandit setting, and provide a new analysis, which also incorporates batches. \citet{zenati22a} use \Nystrom{} points to speed up the algorithm, with the same asymptotic regret guarantee as GP-UCB, which inspires the algorithms developed in this paper.
}

\track{\paragraph{Combinatorial Bandits} In \citep{takemori20a, caramanis, kveton, chen_context_combinatorial}, the optimizer chooses a set at each time step and the submodularity is between these elements chosen in the single time step. In our work, the optimizer chooses a single item at each time and accumulates a set over time; the submodularity is between these elements chosen at different time steps. While the formulations are similar, the formulation in our paper would apply to different applications than this body of work.}


\paragraph{Comparison with \citep{liu2021online, chen2020online}}

These papers have titles similar to the present work, but actually
apply to a very different setting.  Consider Algorithm~1
from~\citep{liu2021online}, which describes the setting they work
within. This algorithm is reproduced below for ease of reference.
\begin{algorithm}[h]
  \caption{\citep{liu2021online}: Greedy for ``online'' (streaming) BP maximization}
  \begin{algorithmic}[1]
    \STATE $S_0 \gets \emptyset$
    \FOR{each element $u_\tvar$ revealed}
        \IF {$\tvar < k$}
         \STATE $S_\tvar \gets S_{\tvar-1} + u_\tvar$
       \ELSE
         \STATE Let $u'_j$ be the element of $S_{\tvar-1}$ maximizing $h(S_{\tvar-1} + u_\tvar - u'_j)$
         \IF{$h(S_{\tvar-1} + u_\tvar - u'_j) - h(S_{\tvar-1}) > \frac{c(h(S_{\tvar-1}))}{k(1-\epsilon)}$}
             \STATE $S_\tvar \gets S_{\tvar-1} + u_\tvar - u'_j$
         \ELSE
             \STATE $S_\tvar \gets S_{\tvar-1}$
         \ENDIF
       \ENDIF
    \ENDFOR
  \end{algorithmic}
\end{algorithm}
We remark that the setting is better described as ``streaming'' rather
than ``online'' since it is consistent with a number of {\bf streaming}
submodular maximization
algorithms~\citep{badanidiyuru2014streaming,chekuri2015streaming,feldman2018less}.
The approach in \citep{liu2021online} assume that the function $h$ is
known, with arbitrary queries available, and there is no cost for
evaluating it with different sets as input.  However, the items are
revealed one by one, using a fixed order, and the algorithm must
decide whether to add the item to the set or to forever forget it.
Hence, there is no statistical estimation component to their setting,
and they provide competitive ratio bounds rather than regret bounds as
in our present work.



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Applications and Role of MNN functions}
\label{sec:app_applications}

In this section, we present two examples that illustrate the modeling
power of BP functions for different applications and compare this
approach with common approaches from the literature.

\subsection{Active Learning}
\label{sec:active-learning-1}

\begin{figure}[t]
     \centering
     \subfigure[Original training set]{ % Add caption for the first subfigure
         \includegraphics[width=0.31\columnwidth]{figs/original_data_active_learn.pdf}
     }
     \subfigure[Submodular objective]{ % Add caption for the second subfigure
         \includegraphics[width=0.31\columnwidth]{figs/submod_active_learn.pdf}
     }
     \subfigure[BP objective]{ % Add caption for the third subfigure
         \includegraphics[width=0.31\columnwidth]{figs/bp_active_learn.pdf}
     }
\caption{Greedy algorithm selection on submodular (second panel) and BP (third panel) objectives for subset selection of $100$ points of training data from a ground set of $400$ points. The first panel depicts the entire training (ground) set. The details are provided in Section~\ref{sec:app_numerical}.}
\label{fig:active_learn}
\end{figure}

From Figure~\ref{fig:active_learn}, we see that the BP function (third panel) results in the {selection of complementary points near the decision boundary---i.e., points of opposite class that are proximal. It is \emph{impossible} to choose a submodular function that encourages this type of desirable cooperative behavior due to the diminishing-returns property.


\paragraph{Comparison with approaches for Pool-based Active Learning}

In their survey paper, \cite{settles} compare submodularity-based approaches with other approaches for active learning. The main benefit of framing the active learning problem as submodular is that the greedy algorithm can be employed, which is much less computationally expensive than other common active learning approaches. While submodularity has been shown to be relevant to active learning  \citep{guestrin_sensor_placement, wei_active_learning, hoi}, \cite{settles} remark that in general, the active learning problem cannot be framed as submodular.

In our paper, by extending the classes of functions that can be optimized online, we take a step towards addressing this limitation of submodularity. Further, an open question outlined in \cite{settles} is that of multi-task active learning, which has not been explored extensively in previous work. However, our formulation in Vignette~\ref{vignette:active_learning} naturally extends to this multi-task setting.

\subsection{Recommendation Systems}
\label{sec:recomm-syst-1}


In Table~\ref{tab:movie_illustrate}, the BP function enables the desirable selection of movies from the same series in the correct order.
As above, it is impossible to design a submodular utility function that encourages this type of behavior.


\paragraph{Comparison with approaches for online recommendation}

For recommender systems, the dependencies between past and future recommendations may be modeled through a 
changing ``state variable," leading to adopting reinforcement learning (RL) solutions \citep{rl_reco_survey};
these can be framed as multi-task RL problems \citep{bose2023initializing, bose2024offline}. 
These have been tremendously effective at maximizing engagement; however, \cite{kleinberg} highlight that a key oversight of these approaches is that the click and scroll-time data that platforms observe is not representative of the users' actual utilities: ``research has demonstrated that we often make choices in the moment that are inconsistent with what we actually want."
Hence, \citeauthor{kleinberg} advocate to encode diminishing returns of addictive but superficial 
content into the model, in the manner that we do with submodular functions. 
Further, RL systems are incentivized to manipulate users' behavior \citep{wilhelm, Hohnhold2015FocusingOT}, mood \citep{kramer} and preferences \citep{epstein_robertson}; this inspires the use of principled mathematical techniques, as in the present work, to design systems to behave as we want rather than simply following the trail of the unreliable observed data.

\begin{table}[t]
    \centering
\begin{tabular}{|l|l|l|}
\hline
{} &                             SM Objective &                                   BP Objective \\
\hline
0 &                          Lion King, The  &                                Godfather, The  \\
1 &                                   Speed  &                       Godfather: Part II, The  \\
2 &                          Godfather, The  &                      Godfather: Part III, The  \\
3 &                 Godfather: Part II, The  &     Star Wars: Episode I \\
4 &                         Terminator, The  &                                       Memento  \\
5 &                       Good Will Hunting  &        Harry Potter I  \\
6 &                                 Memento  &  Star Wars: Episode II\\
7 &  Harry Potter I   &       Harry Potter: II \\
8 &                        Dark Knight, The  &  Star Wars: Episode III \\
9 &                               Inception  &                              Dark Knight, The  \\ \hline
\end{tabular}
\caption{Comparison of the selections of the greedy algorithm on submodular and BP objectives for movie recommendation, on a toy ground set of $23$ movies from the MovieLens dataset. The submodular objective is the facility location objective, chosen from \cite{NIPS2017_krause}. In the BP objective, there is an additional reward at each step for choosing a movie that is complementary with previously selected movies; this results the desirable joint \textbf{selection of groups of movies from the same series}. The task is formalized mathematically in Section~\ref{sec:numerical}, and experimental details are provided in the supplement.}
\label{tab:movie_illustrate}
\end{table}

\begin{table}[tbh]
    \centering
\begin{tabular}{|l|l|}
\toprule
                     Lion King, The  &                             Good Will Hunting  \\
                              Speed  &                      Godfather: Part III, The  \\
                         True Lies  &     Star Wars: Episode I - The Phantom Menace  \\
                           Aladdin  &                                     Gladiator  \\
                  Dances with Wolves  &                                       Memento  \\
                             Batman  &                                         Shrek  \\
                     Godfather, The  &        Harry Potter I: The Sorcerer's Stone   \\
            Godfather: Part II, The  &  Star Wars: Episode II - Attack of the Clones  \\
                    Terminator, The  &       Harry Potter II: The Chamber of Secrets  \\
 Indiana Jones and the Last Crusade  &  Star Wars: Episode III - Revenge of the Sith  \\
                       Men in Black   &                              Dark Knight, The  \\
                                     &                                     Inception  \\
\bottomrule
\end{tabular}
\caption{Ground set for Table~\ref{tab:movie_illustrate}
\label{tab:movie_illustrate_ground_set}
\vspace{0.5em}}
\end{table}



\newpage

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{A simple approach to guarantee low regret: Why it is too weak}
\label{sec:app_naive}


In this section, we provide an alternate proof for the approximation ratio that the greedy algorithm obtains on a BP function in the offline setting. The robustness of this proof can be very simply studied, in a manner similar to \cite{NIPS2017_krause}. However, the approximation ratio obtained is worse than that of \cite{jeff_bp}; hence, the regret guarantee in the online setting would be provided against a weak baseline. This motivates why we revisit the proof from \cite{jeff_bp}. Just for this section, we use simpler notation $h$ for the BP function and $k$ for the cardinality constraint, since we are presenting the argument for the offline setting.

\begin{proposition}
For a BP maximization problem subject to a cardinality constraint, $\max_{S:|S|\leq k}h(S)$ where $h(S)=f(S)+g(S)$, the greedy algorithm obtains the following guarantee:
\begin{equation*}
    h(S)\geq (1-e^{-(1-\kappa^g)})h(S^*),
\end{equation*}
where $S^*=\{v_1^*,\dots,v_k^*\}=\text{arg}\max_{S:|S|\leq k}h(S)$ and $\kappa^g$ is the curvature of the supermodular function $g$.
\end{proposition}
\begin{proof}
For $\tvar<k$, let $S_\tvar=\{v_1,\dots,v_\tvar\}$ be the items chosen by the greedy algorithm. We can write:
\begin{align*}
    h(S^*)&\leq h(S^*\cup S_\tvar)\\
    &=h(S_\tvar)+\sum_{j=1}^kh(v_j^*|S_\tvar\cup \{v_1^*,\dots,v_{j-1}^*\})\\
    &\leq h(S_\tvar)+\frac{1}{1-\kappa^g}\sum_{j=1}^kh(v_j^*|S_\tvar)\\
    &\leq h(S_\tvar)+\frac{1}{1-\kappa^g}\sum_{j=1}^kh(v_{\tvar+1}|S_\tvar)\\
    &= h(S_\tvar)+\frac{k}{1-\kappa^g}\big(h(S_{\tvar+1})-h(S_\tvar)\big),
\end{align*}
where the first inequality uses Lemma C.1.(ii) of \cite{jeff_bp} and the second inequality is due to the update rule of the greedy algorithm. Rearranging the terms, we can write:
\begin{align*}
    h(S^*)-h(S_\tvar)&\leq \frac{k}{1-\kappa^g}\big([h(S^*)-h(S_{\tvar})]-[h(S^*)-h(S_{\tvar+1})]\big)\\
    h(S^*)-h(S_{\tvar+1})&\leq (1-\frac{1-\kappa^g}{k})(h(S^*)-h(S_{\tvar}))
\end{align*}
Applying the above inequality recursively for $\tvar=0,\dots,k-1$, we have:
\begin{equation*}
    h(S^*)-h(S)\leq (1-\frac{1-\kappa^g}{k})^k (h(S^*)-\underbrace{h(\emptyset)}_{=0})
\end{equation*}
Using the inequality $1-x\leq e^{-x}$ and rearranging the terms, we have:
\begin{equation*}
    h(S)\geq (1-e^{-(1-\kappa^g)})h(S^*)
\end{equation*}
If $\kappa_f=1$, this approximation ratio matches the obtained approximation ratio for the greedy algorithm in Theorem 3.7 of \cite{jeff_bp} without the need to change the original proof of the greedy algorithm.
\end{proof}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Proofs from Section~\ref{sec:robustness}}
\label{sec:proofs-sect-refs}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Approximate Greedy on BP Functions}
\label{sec:appr-greedy-bp}

\paragraph{Notation}

We use $S_{\tvar}$ to refer to the ordered set of elements chosen for function $h$ until round $\tvar$, and $S$ to refer to the ordered final set of items chosen for function $h$ until round $\Tvar$. Hence, $S_j$ refers to the first $j$ elements chosen for $h$. Let $s_j$ be the $j^\text{th}$ element of $S$. Then, we define $a_j = h(s_j | \{s_1 \ldots s_{j-1}\})$ be the gain of the $j^\text{th}$ element chosen.

Recall that $S$ is an ordered set. We let $C \subseteq [k]$ denote the indices (in increasing order) of elements in $S$ that are also in $S^\ast$.
For instance, for $S = \{s_1 \ldots s_5\}$ and $S \cap S^\ast = \{s_1, s_2, s_3\}$, we have $C = \{1, 2, 3\}$. Hence, $j \in C \iff s_j \in S \cap S^\ast$. Further, define filtered sets $C_t = \{c \in C| c \leq t\}$ as the subset of the first $t_{th}$ elements of $S$ that are also in the optimal $S^\ast$.

We restate the lemma for ease of reference.
\lemmaone*

\begin{proof}[Proof of Lemma~\ref{prop:robust_greedy_bp}]

From Lemma~\ref{lem:mistake_bound}, we have that the approximate greedy procedure obeys $k$ different inequalities, and we wish to show that this is sufficient to obey the inequality above. In order to complete the argument, we consider the worst-case overall gain if these $k$ inequalities are satisfied; and show that this worst-case sequence satisfies the desired, and hence the approximate greedy procedure must satisfy the desired as well.

To characterize the worst-case gains, we define a set of linear programming problems parameterized by a set $B$ and constants $(\xi, \rho)$.
\begin{equation}
\label{eq:lp}
    \begin{aligned}
    & T(B, \xi, \rho) = \min_b \sum_{j=1}^{k} b_j \\
    & \text{s.t.} \;\;\;  h(S^\ast) \leq \xi \sum_{j \in [t-1] \setminus B_{t-1}} b_j + \sum_{j \in B_{t-1}} b_j  + \frac{k - |B_{t-1}| }{1 - \beta} b_t \; \; \; ,\forall t \in [k]
    \end{aligned}
\end{equation}
In the above, the decision variable $b = [b_1 \ldots b_{k}]$ is a vector in $\R^{k}$, and satisfies $b \geq 0$.
The constants $k$ is a fixed value for the LP.
The parameter of the LP, $B \subseteq [k]$, and $B_t = \{j \in B| j \leq t\}$ is the filtered set. Note that the constraints are linear in $b$ with non-negative coefficients.

The above LP becomes helpful to our setting when we set $(\xi, \beta) = (\kappa_f, \kappa^g)$. Additionally, we are interested in the choices $B = C$ and $B = \emptyset$, where $C$ is defined prior to the lemma statement. To show the result, we hope to show the following chain of inequalities:
\begin{equation}
   h(S) + \sum_{j=1}^{k}  r_j \geq T(C, \kappa_f, \kappa^g) \geq T(\emptyset, \kappa_f, \kappa^g) \geq \omega h(S^\ast)
\end{equation}
In the above,
\[
\omega = \frac{1}{\kappaf}\left[1 - e^{-(1 - \kappa^g) \kappa_{f}}\right]
\]
Combining the two ends of this chain yields the desired lemma statement. 
We recognize that $T(\cdot)$ is exactly the LP considered in \cite{jeff_bp}, 
modulo notation differences. 
Since the second and third inequality are just statements about the linear program, 
they follow directly from Lemma D.2 in \cite{jeff_bp} 
when we substitute $\xi = \kappa_f$ and $\beta = \kappa^g$.

For the first inequality, we have from Lemma~\ref{lem:mistake_bound} that $b_j = a_j + r_j$ is a feasible solution for the linear program $T(C,  \kappa_f, \kappa^g)$. Hence,
\[
T(C, \kappaf, \kappag) \leq \sum_{j=1}^{k} b_j =  \sum_{j=1}^{k} a_j + \sum_{j=1}^{k} r_j = h(S) + \sum_{j=1}^{k} r_j
\]
\end{proof}

\vspace{1em}

The lemma below is a modified version of Equation~(19) in \cite{jeff_bp}, which accounts for the deviation of our algorithm from the greedy policy.

\begin{lemma}
\label{lem:mistake_bound}
Using the notation above and for $S$ as chosen by the approximate greedy procedure, it follows that $\forall t \in [k]$,
\[
h(S^\ast) \leq \kappa_f \sum_{j \in [t-1] \setminus C_{t-1}} (a_j + r_j) + \sum_{j \in C_{t-1}} (a_j + r_j)  + \frac{k - |C_{t-1}| }{1 - \kappa^g} \left(a_t + r_{t} \right)
\]

\end{lemma}
\begin{proof}[Proof of Lemma~\ref{lem:mistake_bound}]
By the properties of BP functions from Lemma C.2 in \cite{jeff_bp}, it follows for all $t \in [k]$ that
\begin{align}
    h(S^\ast) & \leq \kappaf \sum_{j \in [t-1] \setminus C} a_j + \sum_{j \in C_{t-1}} a_j + h (S^\ast \setminus S_{t-1} | S_{t-1})\\
    & \leq \kappaf \sum_{j \in [t-1] \setminus C} (a_j + r_j) + \sum_{j \in C} (a_j + r_j) + h (S^\ast \setminus S_{t-1} | S_{t-1})
\end{align}
The inequality above follows because the coefficients on the first two summations are positive and $r_j \geq 0$. Now, we must simplify the third term to obtain the desired. For any feasible $v$,
\begin{equation}
\label{eq:intro_ri}
    h(v | S_{t-1}) \leq \sup_v h(v|S_{t-1}) \leq h(s_{t} | S_{t-1}) + r_{t} .
\end{equation}

The first inequality follows from the definition of $\sup$ and the second follows from the definition of $r_\tvar$ in the proof of Theorem~\ref{thm:bp_regret_bound} above.  Now, apply inequality (iv) from Lemma C.1 in \cite{jeff_bp}:
\begin{align*}
    h (S^\ast \setminus S_{t-1} | S_{t-1}) & \leq \frac{1}{1 - \kappa^g} \sum_{v \in S^\ast \setminus S_{t-1}} h(v|S_{t-1}) \\ 
    & \leq \frac{1}{1 - \kappa^g} \sum_{v \in S^\ast \setminus S_{t-1}} h(s_{t} | S_{t-1}) + r_{t}
\end{align*}
The second line follows from Equation~\eqref{eq:intro_ri}.

We have that \[
|S^\ast \setminus S_{t-1}| = |S^\ast| - |S^\ast \cap S_{t-1}| = k - |S^\ast \cap S_{t-1}|.
\]
Hence,
\begin{align*}
    h (S^\ast \setminus S_{t-1} | S_{t-1}) & \leq \frac{k - |S^\ast \cap S_{t-1}|}{1 - \kappa^g} \left[h(s_{t} | S_{t-1}) + r_{t} \right]
\end{align*}
Recognizing that  $|S^\ast \cap S_{t-1}| = |C_{t-1}|$ completes the argument.
\end{proof}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Approximate Greedy on WS Functions}
\label{sec:appr-greedy-ws}

Define $S, s_j, a_j, C$ as in the proof for BP functions. Below, we present the counterparts of the lemmas in the proof of the BP functions for the present case. The proof for Lemma~\ref{lem:sr_mistake_bound} is different than Lemma~\ref{lem:mistake_bound} due to the change in the class of functions being considered. The similarity of the two proofs suggests the generality of our proof technique and indicates that it may be analogously applied to other classes of functions as well. We restate the Lemma for ease of reference.

\lemmatwo*

\begin{proof}[Proof of Lemma~\ref{lem:robust_greedy_sr}]
We consider again the parameterized LP $T(\cdot)$, but this time with the constants set as $\xi = \zeta, \rho = 1 - \gamma$.
To show the result, we hope to show the following chain of inequalities:
\begin{equation}
   h(S) + \sum_{j=1}^{k}  r_j \geq T(C, \zeta, 1 - \gamma) \geq T(\phi, \zeta, 1 - \gamma) \geq \omega h(S^\ast)
\end{equation}
In the above,
\[
\omega = \frac{1}{\zeta} \left[ 1 - \left(1 - \frac{\gamma \zeta}{k} \right)^{k} \right]
\]

Similarly to the argument in Lemma~\ref{prop:robust_greedy_bp}, 
the first two inequalities follow directly from 
Lemma D.2 in \cite{jeff_bp} when we substitute 
$\xi = \zeta$ and $\rho = 1 - \gamma$. 
Under the same choice of constants, 
we have from Lemma~\ref{lem:sr_mistake_bound} 
that $b_j = a_j + r_j$ is a feasible solution for the linear program 
$T(C, \zeta, 1 - \gamma)$. Hence,
\[
  T(C, \zeta, 1 - \gamma) \leq \sum_{j=1}^{k} b_j =  \sum_{j=1}^{k} a_j + \sum_{j=1}^{k} r_j = h(S) + \sum_{j=1}^{k} r_j
\]
Recognizing that
\[
\frac{1}{\zeta} \left[ 1 - \left(1 - \frac{\gamma \zeta}{k} \right)^{k} \right] \geq 1 - e^{-\zeta \gamma}
\]
completes the argument.
\end{proof}

The lemma below is a modified version of Lemma~1 in \cite{bian2019guarantees}.

\begin{lemma}
\label{lem:sr_mistake_bound}
Using the notation above and for $S$ as chosen by the approximate greedy procedure, it follows that $\forall t \in \{0 \ldots k - 1\}$,
\[
h(S^\ast) \leq \zeta \sum_{j \in [t] \setminus C_t} (a_j + r_j) + \sum_{j \in C_t} (a_j + r_j)  + \frac{1}{\gamma} (k - |C_t|) \left(a_{t+1} + r_{t+1} \right)
\]

\end{lemma}
\begin{proof}[Proof of Lemma~\ref{lem:sr_mistake_bound}]

The proof follows from the definitions of generalized curvature, submodularity ratio, and instantaneous regret $r_\tvar$.

\begin{align*}
    h(S^\ast \cup S_t) & = h(S^\ast) + \sum_{j \in [t]} h(s_{j}| S^\ast \cup S_{j-1})
\end{align*}

We can split the summation above to separately consider the elements from $S_t$ that do and do not overlap with $S^\ast$.
\begin{align}
   h(S^\ast \cup S_t)  & = h(S^\ast) + \sum_{j: s_j \in S_t \setminus S^\ast} h(s_{j}| S^\ast \cup S_{j-1}) + \underbrace{\sum_{j: s_j \in S_t \cap S^\ast} h(s_{j}| S^\ast \cup S_{j-1})}_{=0} \notag \\
    & = h(S^\ast) + \sum_{j: s_j \in S_t \setminus S^\ast} h(s_{j}| S^\ast \cup S_{j-1}) \label{eq:union_utility}
\end{align}

From the definition of submodularity ratio,
\begin{equation}
\label{eq:apply_submod_ratio}
    h(S^\ast \cup S_t) \leq h(S^\ast) + \frac{1}{\gamma} \sum_{\omega \in S^\ast \setminus S_t} h(\omega | S_t)
\end{equation}

From the definition of generalized curvature, it follows that
\begin{align}
\sum_{j: s_j \in S_t \setminus S^\ast} h(s_{j}| S^\ast \cup S_{j-1}) & \geq (1 - \zeta) \sum_{j: s_j \in S_t \setminus S^\ast} h(s_{j}| S_{j-1}) \notag \\
& = (1 - \zeta) \sum_{j: s_j \in S_t \setminus S^\ast} a_{j+1} \label{eq:apply_curvature}
\end{align}

Then, plugging the inequalities \eqref{eq:apply_submod_ratio} and \eqref{eq:apply_curvature} into \eqref{eq:union_utility},
\begin{align}
    h(S^\ast) & = h(S^\ast \cup S_t) - \sum_{j: s_j \in S_t \setminus S^\ast} h(s_{j}| S^\ast \cup S_{j-1}) \notag \\
    &\leq \left[h(S) + \frac{1}{\gamma} \sum_{\omega \in S^\ast \setminus S} h(\omega | S)\right] +  \left[\zeta \sum_{j: s_j \in S_t \setminus S^\ast} a_{j+1} -  \sum_{j: s_j \in S_t \setminus S^\ast} a_{j+1} \right] \label{eq:sr_box}
\end{align}

Now, we can rearrange and write \[
h(S) - \sum_{j: s_j \in S_t \setminus S^\ast} a_{j+1} = \sum_{j: s_j \in S_t \cap S^\ast} a_{j+1}
\] to simplify Equation~\eqref{eq:sr_box} as
\begin{align}
        h(S^\ast) & = \zeta \sum_{j: s_j \in S_t \setminus S^\ast} a_{j+1} + \frac{1}{\gamma} \sum_{\omega \in S^\ast \setminus S} h(\omega | S) + \sum_{j: s_j \in S_t \cap S^\ast} a_{j+1} \notag \\
    & \leq  \zeta \sum_{j: s_j \in S_t \setminus S^\ast} a_{j+1} + \frac{1}{\gamma} \sum_{\omega \in S^\ast \setminus S} (a_{t+1} + r_{t}) + \sum_{j: s_j \in S_t \cap S^\ast} a_{j+1} \label{eq:sr_rt_sup} \\
    & \leq \zeta \sum_{j: s_j \in S_t \setminus S^\ast} (a_{j+1} + r_j) + \sum_{j: s_j \in S_t \cap S^\ast} (a_{j+1} + r_j) + \frac{1}{\gamma} (k - |C_\tvar|) (a_{t+1} + r_{t}) \label{eq:sr_rt_pos}
\end{align}

Equation~\eqref{eq:sr_rt_sup} follows by using the definitions of $r_t$ and supremum, and Equation~\eqref{eq:sr_rt_pos} follows since $r_t \geq 0$.
\end{proof}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Approximate Weighted Greedy on BP Functions}
\label{sec:appr-weight-greedy}

We recap notation for ease of reference. Define the modular lower bound of the submodular function $l_{1}(S) = \sum_{j \in S} f(j| V \backslash \{j\})$. Additionally, define the totally normalized submodular function as $\fone(S) = f(S) - l_{1}(S)$. Note that the $\fone$ will always have curvature $\kappa_f = 1$.
$h (S) = f_{1}(S) + g (S) + l_{1}(S)$. Now, define the function
\begin{equation}
    \pi_{j} (v | A) = \left(1 - \frac{1}{k}\right)^{k - j - 1} f_{1} (v|A) + g(v|A) + l_{1}(v)
\end{equation}
Also define
\begin{equation}
    \pi_{j} (A) = \left(1 - \frac{1}{k}\right)^{k - j} f_1(A) + g(A) + l_1(A)
\end{equation}

The proof of Lemma 7 follows using the same outline as the approximation-guarantee of the distorted greedy algorithm for BP functions (Theorem 3 in \cite{distorted_greedy}). However, Algorithm~\ref{alg:mnnucb_dist} is not greedy, so we keep record of the deviation of Algorithm~\ref{alg:mnnucb_dist} from the best-in-hindsight choice at \emph{each stage}. This results in the new second term in Lemma~\ref{prop:robust_distorted_greedy}.

\lemmathree*

\begin{proof}
    Using the submodular and supermodular curvature definition, we can write:
\begin{align*}
    &l_1(S) = \sum_{j \in S} f (j| V \backslash \{j\})\geq (1-\kappa_f)f(S)\\
    &l_2(S) = \sum_{j \in S} g (j|\emptyset)\geq (1-\kappa^g)g(S)
\end{align*}
Define $l = l_1 + l_2$. Then, we can use the result of Lemma~\ref{lem:distorted_rj_lemma} to write:
\begin{align*}
    f(S) + g(S) &= f_1(S) + g_1(S) + l(S) \\
    &\geq \left(1 - \frac{1}{e} \right) f_1 (S^\ast) + l (S^\ast) - \rjsum \\
    &= \left(1 - \frac{1}{e} \right) (f (S^\ast)-l_1(S^\ast)) + l_1 (S^\ast) + l_2(S^\ast) - \rjsum \\
    &= \left(1 - \frac{1}{e} \right) f (S^\ast) + \frac{1}{e} l_1 (S^\ast) + l_2(S^\ast) - \rjsum \\ 
    &\geq \left(1 - \frac{1}{e} \right) f (S^\ast) + \frac{1-\kappa_f}{e} f(S^\ast) + (1-\kappa^g)g(S^\ast) - \rjsum \\ 
    &= \left(1 - \frac{\kappa_f}{e} \right) f (S^\ast) + (1-\kappa^g)g(S^\ast) - \rjsum \\ 
    &\geq \min \left\{ 1 - \frac{\kappa_f}{e}, \; 1 - \kappa^g \right\} \; h_q (S^\ast) - \rjsum.
\end{align*}
\end{proof}


\vspace{1em}

\begin{lemma}
\label{lem:dist_gain_bound}
\[
\pi_{j} (s_j| S_{j-1}) +  r_j \geq \frac{1}{k} \left(1 - \frac{1}{k} \right)^{k - (j +1)} (f(S^\ast) - f(S_j) + \frac{1}{k} l(S^\ast)
\]
\end{lemma}
\begin{proof}[Proof of \ref{lem:dist_gain_bound}]

From the definition of $r_j$,

\begin{align*}
  \pi_{j} (s_j| S_{j-1}) + r_j & \geq \frac{1}{k} \sum_{e \in S^\ast} \pi_j (e|S_{j-1}) \\
  & = \frac{1}{k} \sum_{e \in S^\ast} \left(1 - \frac{1}{k} \right)^{k - (j +1)} f_1 (e| S_{j-1}) + g_1 (e| S_{j-1}) + l (e) \\ 
  & \geq \frac{1}{k} \left(1 - \frac{1}{k} \right)^{k - (j +1)} \left(f_1(S^\ast) - f_1(S_{j-1}) \right) + \frac{1}{k} l (S^\ast)
\end{align*}

The inequality follows from the submodularity of $f_1$ and the supermodular curvature of $g_1$.

\end{proof}


\begin{lemma}
\label{lem:distorted_rj_lemma}
Any approximately weighted greedy procedure with constants $\{r_j\}_{j=1}^k$ returns a set $S$ of size $k$ such that
\[
f_1(S) + g_1(S) + l(S) + \rjsum \geq \left(1 - \frac{1}{e} \right) f_1 (S^\ast) + l (S^\ast)
\]
\end{lemma}
\begin{proof}
According to the definition of $\pi$, we have that $\pi_{0} (\emptyset) = 0$ and
\[
\pi_{k} (S) =  f_1(S) + g_1 (S) + l (S)
\]

Applying Lemma~4 from \cite{distorted_greedy}, we have

\begin{align*}
    &\pi_{j+1} (S_{j+1}) - \pi_j (S_j) \\
    & = \pi_j (s_{j+1}| S_j) + \frac{1}{k} \left(1 - \frac{1}{k} \right)^{k - (j +1)} f_1 (S_j) \\ 
    & \geq \frac{1}{k} \left(1 - \frac{1}{k} \right)^{k - (j +1)} f_1(S^\ast) + \frac{1}{k} l (S^\ast) - r_{j+1}
\end{align*}

Above, we applied Lemma~\ref{lem:dist_gain_bound} to obtain the inequality. Now, we have that
\begin{align*}
    & f_1 (S) + g_1 (S) + l (S) = \sum_{j=0}^{k-1} \pi_{j+1} (S_{j+1}) - \pi_j (S_j) \\
    & \geq \sum_{j=0}^{k-1} \frac{1}{k} \left(1 - \frac{1}{k} \right)^{k - (j +1)} f_1(S^\ast) + \frac{1}{k} l (S^\ast) - r_{j+1} \\
    & \geq \left (1 - \frac{1}{e}\right)f_1(S^\ast) + l(S^\ast) - \rjsum
\end{align*}

\end{proof}



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Discussion and Proofs from Section~\ref{sec:alpha_regret}}
\label{sec:app_smucb}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Remarks on hyperparameters \texorpdfstring{$(\eta, b)$}{eta, b}}
\label{sec:remark-hyperparameters}

Note that $b$ refers to our budget fraction variable as it serves to limit the final size of $G_t$, while $\eta$ is an accuracy-computation tradeoff variable that tends to produce larger $G_t$'s. While $\eta$ and $b$ are somewhat related (and are partially redundant) we utilize the “budget” and “accuracy” notion as originally defined in \cite{zenati22a} to be consistent with that work

\subsection{Remarks on step size \texorpdfstring{$\beta_\tvar$}{beta t}}
\label{sec:remark-steps-beta_t}


From on the analysis found in~\citet{zenati22a},
we set
\begin{equation}
\label{eq:beta}
\beta_\tvar = \sqrt{\lambda}B + \sqrt{4 \log(\Tvar) + \log\left(e + \frac{e \tvar}{\lambda}\right) \deff}
\end{equation}
which enables our regret bounds to hold where $e = \exp(1)$, $\lambda$ is a hyperparameter,
and $B$ is our RKHS norm bound.

In our empirical simulations, however, we found it much more effective
to set $\beta_\tvar$ to a constant which is then tuned as a
hyperparameter. In fact, \citet{zenati22a} found this to be the case
in their simulations as well.

\subsection{Remark on role of kernel parameters on \texorpdfstring{$\deff$}{deff}}
\label{sec:kernel_deff}

Consider the RBF kernel $\kernel(x, x') = \exp(-b \|x - x'\|^2)$.
If the parameter $b$ is very large, then the kernel function will be very close to zero for all $x \neq x'$.
Hence, the kernel matrix  $K_T$ will be close to the identity matrix,
and the eigenvalues will decay very slowly.
Hence the effective dimension $\deff$ is likely to be large.
Our current regret bound does not capture this, because we wanted to focus on the scaling of regret with $T$.
However, there is a constant in front that scales as $b$,
which effectively changes the base of the $\log(T)$ in the regret bound \citep[Section~4.B]{seeger2008information}.
In Figure~\ref{fig:deff_b}, we see that if the horizon $T$ is quite small, this effect can dominate and make
the $T$-scaling appear almost linear.
On the other hand, if we make $b$ very small, then the quantity $B$ would increase;
this is because $\kernel(x, x')$ being large is not very informative about the function values at $x$ and $x'$.
Hence, some care is required to tune the kernel parameters correctly.
This applies to other kernel functions as well.
This effect is present in prior works \citep{icml2010_srinivas, nips2011_krause, zenati22a} as well,
but these do not address it explicitly which is why we wanted to offer some clarity about this point.

\begin{figure}[t]
  \centering
  \includegraphics[width=0.7\textwidth]{figs/deff_constants.pdf}
  \caption{The dependence of effective dimension $\deff$ as on the parameter $b$ in the RBF kernel.}
  \label{fig:deff_b}
\end{figure}

\subsection{Proof of Theorem~\ref{thm:bp_regret_bound}(b)}
\label{sec:proof-theor-refthm:b-1}

The proof follows the same LP construction of \cite{conforti1984submodular} as Theorem~\ref{thm:bp_regret_bound}. The main contribution lies showing Lemma~\ref{lem:sr_mistake_bound}; this shows that the offline counterpart, Lemma 1 from \cite{bian2019guarantees},  holds in the online setting as well.

\begin{proof}
Define $r_\tvar$ and $R_\tvar$ as in the proof of Theorem~\ref{thm:bp_regret_bound}.
From Lemma~\ref{lem:robust_greedy_sr} applied to each $h_q$, it follows that
\begin{equation*}
    \mathcal{R}_{\text{WS}}(\Tvar) \leq \sum_{k=1}^m \sum_{\tvar=1}^\Tvar \mathbb{I}(u_\tvar = k) r_\tvar = R_\Tvar
\end{equation*}

\track{As in the proof of Theorem~\ref{thm:bp_regret_bound}, combining Theorem~4.1 of \cite{zenati22a} with the above inequality, our argument is complete.}
\end{proof}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{Discussion and proofs from Section~\ref{sec:separate_feedback}}
\label{sec:app_distorted}

\subsection{Remarks on \texorpdfstring{\cite{distorted_greedy}}{distorted greedy}}
\label{sec:app_error}

\begin{figure}[t]
     \centering
     \includegraphics[width=0.8\columnwidth]{figs/heatmap.pdf}
    \caption{Contour plot of (left) $F_1(\kappafk, \kappagk) = \min \left\{ 1 - \frac{\kappafk}{e}, \; 1 -\kappagk \right\} -  \frac{1}{\kappa_{q,f}} \left[1 - e^{-(1 - \kappagk) \kappa_{q,f}} \right]$ and (right) $F_2(\kappafk, \kappagk) = \min \left\{ 1 - \frac{1}{e}, \; 1 -\kappagk \right\} -  \frac{1}{\kappa_{q,f}} \left[1 - e^{-(1 - \kappagk) \kappa_{q,f}} \right]$. (Left) compares the $\alpha$ from Theorem~\ref{thm:bp_regret_bound_dist_strong} with that from Theorem~\ref{thm:bp_regret_bound}, and (right) compares $\alpha$ from Proposition~\ref{prop:bp_regret_bound_dist_weaker} with that from Theorem~\ref{thm:bp_regret_bound}.
    }
\label{fig:contour}
\end{figure}

\paragraph{Comparison of $\alpha$ for greedy vs weighted Greedy.} In Figure~\ref{fig:contour}, we compare the $\alpha$ of the greedy optimization of the BP function in \cite{jeff_bp} with the distorted greedy variant in Equation~\eqref{eq:bp_regret_strong}. In the left panel, we see that the $\alpha$ in Equation~\eqref{eq:bp_regret_strong} is everywhere greater.

\paragraph{Error in \cite{distorted_greedy} and proposed fix.}
In \cite{distorted_greedy} on the bottom of Pg.188, 
the authors use the inequality:
$$ \sum_{e \in \text{OPT}} g_1 (e | S_t) \geq (1 - \kappa^g) (g_1(OPT) - g_1(S_t)) $$
Consider the following counterexample with $|V| = 3$ and $k = 2$ as the cardinality constraint.
Define $g(S) = |S|^2$, which is a concave over modular function, 
so it is supermodular. 
We can verify from definitions that $\kappa^g = 0.8$ 
and the modular lower bound $l_2(S) = |S|$, so that $g_1(S) = |S|^2 - |S|$. 
For simplicity, consider the case where $t=0$, so that $S_t = \emptyset$. 
Then, plugging into the equation, we see that the LHS is $0$, whereas the RHS is $0.2 \times 4 = 0.8 > 0$. 
Hence, this is a contradiction.
This example can be easily generalized to any concave over modular function, larger ground set sizes or different $t$. 

We rectify this by swapping this inequality with
\[
g_1 (e|S_\tvar) \geq (1 - \kappa^{g_1}) (g_1 (\text{OPT}) - g_1 (S_\tvar)) = 0
\]
The equality above holds because $\kappa^{g_1} = 1$ by construction. Hence, the $g_1$ term disappears from the analysis. In the analysis below, we make this fix and propagate the consequences; the modified algorithm, 
analysis and result applies to the offline setting of \cite{distorted_greedy} as well.

\subsection{Proof of Separate Feedback Guarantee}
\label{sec:separate_proof}

\begin{proof}[Proof of Theorem~\ref{thm:bp_regret_bound_dist_strong}]
First, we define some notation.
\begin{align*}
    &l_{q,1}(S) = \sum_{j \in S} f_q(j| V \backslash \{j\}) \\
    &\fkone(S) = \fk(S) - l_{q,1}(S)\\
    &l_{q,2}(S) = \sum_{j \in S} g_q(j|\emptyset) \\
    &\gkone(S) = \gk(S) - l_{q,2}(S)\\ 
    &l_q(S)=l_{q,1}(S)+l_{q,2}(S)
\end{align*}
% Recall that we use $S_{q, \tvar}$ to refer to the ordered set of elements chosen for function $h_q$ until round $\tvar$ and $S_q$ to refer to the ordered final set of items chosen for function $h_q$ until round $\Tvar$.  Let the ordered set $M = \{m_1, m_2, \ldots m_{\Tvar_q}\}$ denote the set of time steps $\{\tvar \in [T]: u_\tvar = q\}$ where the $q$-th user arrived to the optimizer. Hence,  
We restrict attention to the $q$-th function $h_q$. Recall that $S_{j, q}$ refers to the first $j$ elements chosen for $h_q$.

Let the distorted objective for user $q$ when selecting the $j$-th item in the set be:
\[
\pi_{j, q} (S) = \left(1 - \frac{1}{\Tvar_q} \right)^{\Tvar_q - j} f_{q,1} (S) + g_{q, 1} (S) + l_q(S)
\]
Additionally, define
\[
\Lambda_{j, q} (x, A) = \left(1 - \frac{1}{\Tvar_q} \right)^{\Tvar_q - (j+1)} f_{q,1}(x | A) + g_{q,1}(x | A) + l_q (x)
\]

As previously, we define the instantaneous regret at round $\tvar$ as the difference between the maximum possible utility that is achievable in the round and the actual received utility. However, this time, $r_\tvar$ is defined in terms of the distorted objective. \emph{This is a key difference from the earlier arguments that is crucial to the current proof.}
\[
r_\tvar = \sup_{v \in V} \Lambda_{q, t_{u_t}} (v, S_{u_\tvar, \tvar-1}) -  \Lambda_{q, t_{u_t}} (v_\tvar, S_{u_\tvar, \tvar-1})
\]
Define the accumulated instantaneous regret until round $\tvar$ as
\[
R_\tvar = \sum_{j=1}^\tvar r_j
\]
Recognize that $R_\tvar$ is different than $\Rt$. From Lemma~\ref{prop:robust_distorted_greedy} applied to each $h_q$, it follows that
\begin{equation}
\label{eq:bp_bound_cal_by_reg_dist}
    \RT \leq \sum_{q=1}^m \sum_{\tvar=1}^\Tvar \mathbb{I}(u_\tvar = q) r_\tvar = R_\Tvar
\end{equation}

Now, we can model the problem of the present work as a contextual bandit problem in the vein of \cite{zenati22a}. Here, the context in the $\tvar$-th round is $z_\tvar = (\phi_{u_\tvar}, S_{t_{u_t}, u_t})$. Now we invoke Theorem~4.1 in \cite{zenati22a}, Thus, we have that
\[
  \mathbb{E} [R_\Tvar] \leq O\left(\sqrt{\Tvar} \left(B \sqrt{\lambda \deff} + \deff\right) \right)
\]
Combining this with Inequality~\eqref{eq:bp_bound_cal_by_reg_dist}, our argument is complete.

\end{proof}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Analysis without \ref{assump:1.1}}
\label{sec:analys-with-refass}

In certain applications, \ref{assump:1.1} on $l_{q,1}$ may not be reasonable. For these cases, we may modify the algorithm slightly, and provide an alternative bound, that is slightly weaker. Consider a modified version of Algorithm~\ref{alg:mnnucb_dist}, where line $3$
is substituted with:
\begin{equation}
\label{eq:modified_dist_alg}
    \text{Set } y_\tvar = \left(1 - 1/T_{u_\tvar} \right)^{\Tvar_{u_\tvar} - (t_{u_t}+1)} y_{f, \tvar}  + y_{g, \tvar}
\end{equation}
Recognize that this Algorithm does not require \ref{assump:1.1}.

Define
\begin{equation}
  \label{eq:bp_regret_weaker}
    \mathcal{R}_{\text{BP, 3}} (\Tvar) := \sum_{q=1}^m \min \left\{ 1 - \frac{1}{e}, \; 1 -\kappagk \right\} h_q (S_q^\ast) - h_q(S_q).
\end{equation}
Observe from the right panel of Figure~\ref{fig:contour} that the $\alpha$ in the definition above is still better than that of \cite{jeff_bp} for most choices of $\kappafk, \kappagk$. Now, we can state our modified result. The proof follows similarly to Theorem~\ref{thm:bp_regret_bound_dist_strong}.
\begin{proposition}
  \label{prop:bp_regret_bound_dist_weaker}
Let Assumption~\ref{ass:rkhs} and \ref{ass:separate} and \ref{assump:1.3} hold. Additionally, let the conditions on $\epsilon_\tvar$ hold as in Theorem~\ref{thm:bp_regret_bound_dist_strong}. Then Algorithm~\ref{alg:mnnucb_dist} with the modification above yields
$
  \mathbb{E} [\mathcal{R}_{\text{BP, 3}} (\Tvar)] \leq O\left(\sqrt{\Tvar} \left(B \sqrt{\lambda \deff} + \deff\right) \right)
$
\end{proposition}

\begin{proof}[Proof of Proposition~\ref{prop:bp_regret_bound_dist_weaker}]
    For the modified version of the algorithm described in equation \eqref{eq:modified_dist_alg}, the analysis is almost identical. Repeating the analysis of Lemma~\ref{lem:distorted_rj_lemma} and Lemma~\ref{lem:dist_gain_bound}, we obtain:
\[
\fk(\Sk) + \gkone(\Sk) + l_{q,2}(\Sk) + \rmjsum \geq \left(1 - \frac{1}{e} \right) \fk (\Skstar) + l_{q,2} (\Skstar)
\]
Then, we can follow the same arguments as in Lemma~\ref{prop:robust_distorted_greedy} to conclude:
\[
    \fk(S) + \gk(S)\geq \min \left\{ 1 - \frac{1}{e}, \; 1 - \kappagk \right\} \; h_q (\Sk^\ast) - \rmjsum.
\]
\end{proof}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Remarks on Guess-and-double technique to replace \ref{assump:1.3}}
\label{sec:remarks-guess-double}

In this section, we provide a heuristic argument for why we expect that guess-and-double techniques should not affect the overall regret scaling in Theorem~\ref{thm:bp_regret_bound_dist_strong}.

In the traditional multi-armed bandit, when the time horizon $\Tvar$ is unknown, the proposed method of dealing with this is to start with an initial guess $\widehat{\Tvar} = 1$ and then double each time the current time step crosses our latest guess. Any parameters in the algorithm that depend on $\Tvar$ (step size for e.g) are set based on $\widehat{\Tvar}$ instead. This divides the entire horizon into phases, one for each guess $\widehat{\Tvar}$. Then, for each phase, the regret must be sublinear because this is equivalent to playing a shorter game with known horizon. Since the regret is the accumulation of the regrets of each phase, the overall regret must be sublinear as well.

However, in our case, the situation is more intricate because the overall regret is not expressible as the summation of regret over phases. Hence, the original style of argument does not apply. What we do then, is to keep track of the change in regret due to setting the distortion co-efficient in terms of $\widehat{\Tvar_q}$ instead of $\Tvar_q$. We choose $\hatTq = \min \{ 2^j: j > \tvar \}$.

When $T_q$ is known, the distortion $D_\tvar = \left(1 - \frac{1}{\Tvar_{u_\tvar}} \right)^{\Tvar_{u_\tvar} - t_{u_t} - 1}$ increases monotonically from $\left(1 - \frac{1}{\Tvar_{u_\tvar}} \right)^{\Tvar_{u_\tvar} - 1}$ to $1$ with $t_{u_t}$ i.e as more elements are added. This monotonicity is used in the original argument to obtain the sublinear regret guarantee.

However, when the guess-and-double technique is used, the distortion is no longer monotonic in $t_{u_t}$. Within each phase, $D_\tvar$ increases from $\left(1 - \frac{1}{\widehat{\Tvar_{u_\tvar}}} \right)^{\widehat{\Tvar_{u_\tvar}} - 1}$ to $1$ but then reduces once $\widehat{\Tvar_{u_\tvar}}$ is updated at the end of the phase. It turns out that the regret actually decreases within the phase (compared to the situation where we know $T_q$) due to the increased distortion, but increases in the transitions between the phases. Below, we characterize the changes in regret in the two cases.

Define
\[
\widehat{\Lambda}_{j, q} (x, A) = \left(1 - \frac{1}{\hatTq} \right)^{\hatTq - (j+1)} f_{q,1}(x | A) + g_{q,1}(x | A) + l_q (x)
\]

Analogously, we can define
\[
\hatpi_{j, q} (S) = \left(1 - \frac{1}{\hatTq} \right)^{\hatTq - j} f_{q,1} (S) + g_{q, 1} (S) + l_q(S)
\]

\paragraph{Case 1: Within phase}

Previously Lemma~4 from \cite{distorted_greedy}, we had
\begin{align*}
    &\pi_{j+1, q} (\Skjnext) - \pi_{j, q} (\Skj) \\
    & = \Lambda_{j, q} (s_j, \Skj) + \frac{1}{\Tk} \left(1 - \frac{1}{\Tk} \right)^{\Tk - (j +1)} f_{q,1} (\Skj)
\end{align*}

Now, we can replace this conclusion with
\begin{align*}
    &\hatpi_{j+1, q} (\Skjnext) - \hatpi_{j, q} (\Skj) \\
    & = \widehat{\Lambda}_{j, q} (s_j, \Skj) + \frac{1}{\hatTq} \left(1 - \frac{1}{\hatTq} \right)^{\hatTq - (j +1)} f_{q,1} (\Skj) \\
    & = \widehat{\Lambda}_{j, q} (s_j, \Skj) + \frac{1}{\hatTq} \left(1 - \frac{1}{\hatTq} \right)^{\hatTq - (j +1)}  f_{q,1} (\Skj) + \underbrace{\left(\frac{1}{\hatTq} - \frac{1}{\Tk} \right) \left(1 - \frac{1}{\hatTq} \right)^{\hatTq - (j +1)} f_1 (\Skj)}_{N_{\text{within}, j}}
\end{align*}

The term $N_{\text{within}, j}$ is a new term. The remainder of the proof goes through as expected, while these additional terms propagate through the proof.

\paragraph{Case 2: Between phase}

% TODO: is this j a time variable, change
Note that if step $j$ is in a different phase than step $j+1$, it follows that the distortion at step $j$ is
\[
\left(1 - \frac{1}{\hatTq} \right)^{\hatTq - \hatTq} = 1.
\]
Since step $\tvar+1$ is the first time step in a phase, it follows that the guess for $\hatTq$ just doubled, and is $\hatTq = 2i$. Then, the distortion for step $j+1$ is
\[
\left(1 - \frac{1}{2i} \right)^{\tvar-1} % TODO: should this i be \tvar?
\]
As in the Case $1$, we can track the extra term from Lemma~4, which in this case is
\[
N_{\text{between}, j} = - \left(1 - \left(1 - \frac{1}{2i} \right)^{\tvar-1} \right) f_1 (\Skj)
\]
As before, this new term propagates through the proof.

\paragraph{Putting it together}

Accounting for the new terms, our modified final statement of Lemma~\ref{prop:robust_distorted_greedy}
\[
\hk(\Sk) \geq \min \left\{ 1 - \frac{\kappafk}{e}, \; 1 - \kappagk \right\} \; h_q (\Sk^\ast) - \rmjsum + \sum_{j: \text{change}} N_{\text{between}, j} + \sum_{j: \text{no change}} N_{\text{within}, j}
\]
Above the indices $(j: \text{change})$ include the $\log(\Tk)$ time steps, which are the first time step in a phase i.e the first time step after our guess $\hatTq$ was recently updated; the indices $(j: \text{no change})$ include all other time steps. Hence, the new term
\[
N = \sum_{j: \text{change}} N_{\text{between}, j} + \sum_{j: \text{no change}} N_{\text{within}, j}
\]
gets subtracted from the regret. We observe that each of the $N_{\text{between}, j}$ terms are positive and there are many of these: $\Tk - \log(\Tk)$ to be precise. However, the $N_{\text{within}, j}$ terms are negative and increase the regret; however, there are only $\log(\Tk)$ of these. While it is difficult to quantify the terms exactly, there is no strong reason to believe that the few negative terms greatly outweigh the positive terms. From preliminary simulations, we find that the regret remains roughly the same with the doubling trick; we leave an extensive experimental investigation of this to future work.


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Details on Experiments}
\label{sec:app_numerical}



\paragraph{Details for Table~\ref{tab:movie_illustrate_ground_set}, Table~\ref{tab:movie_illustrate}} The chosen toy ground set of $23$ elements is detailed in Table~\ref{tab:movie_illustrate_ground_set}. The submodular function is the facility location function; we chose this function because it is used in prior work \cite{NIPS2017_krause} for the task of movie recommendation. The supermodular part is the sum-sum-dispersion function, and the weights that capture the complementarity between movies are specified in the python notebook 
\url{code/table-1.ipynb} in the attached code.

From Table~\ref{tab:movie_illustrate}, we notice that with the submodular objective, the greedy algorithm chooses the first two movies in the Godfather series but does not choose the third. Similarly, it chooses the first Harry Potter but not the subsequent ones. In contrast, with the BP function, the greedy algorithm chooses all elements from the series in both cases. This behavior cannot be encoded using solely a submodular function, but it is very easy to do so with a BP function.

\paragraph{Setup for movie recommendation in Figure~\ref{fig:movie_performance}}


From MovieLens and using the matrix-completion approach in
\cite{cai_svt}, we obtain a ratings matrix
$M \in \mathbb{R}^{900 \times 1600}$, where $M_{i,j}$ is the rating of
the $i_{th}$ user for the $j^\text{th}$ movie; for density of data, we
consider the most active users and most popular movies.

%Using this dataset, we setup an interactive BP maximization problem, as formulated in Vignette~\ref{vignette:movie}.
We cluster the users into $m = 10$ groups using the $k$-means algorithm and design a BP objective for each user-group. The objective for the $q_{th}$ group is decomposed as $h_q(A) = \sum_{v \in A} m_q(v) + \lambda_1 f_q(A) + \lambda_2 g_q(A)$, where the modular part $m_q(v)$ is the average rating for movie $v$ amongst all users in group $k$.

Let the set $L$ refer to the collection of all genres in the ground set. The concave-over-modular submodular part encourages the recommender to maintain a balance across genres in chosen suggestions:
$
f_q (A) = \sum_{g \in L} \sqrt{1 + u_{q,g} (A)}.
$
The set $L$ is the collection of all genres. We now specify what $u_{q,g} (\cdot)$ is. For each element $v \in V$, define a vector $r(v) \in \{0,1\}^{|L|}$. Here, each entry corresponds to a genre and is $1$ if the genre is associated with the movie $v$. Then let $N_v = r(v)^\top \mathbf{1}$ denote the number of genres for movie $v$. In $f_q(\cdot)$, we specify
\[
u_{q,g}  (A) =  \sum_{v \in A}  \mathbf{1}(m_q (v) > \tau) \frac{\mathbf{1}(\text{v has genre g})}{N_v}
\]
Above, $\mathbf{1}$ is the indicator function.


The supermodular function, in contrast is designed to encourage the optimizer to exploit complementarities within genres
$
g_q (A) = \sum_{g \in L} \left(1 + \tilde{u}_{q,g} (A) \right)^2,
$
where we define
\[
\Tilde{u}_{q,g}  (A) =  \sum_{v \in A}  \mathbf{1}(\text{v has genre g}(m_k (v) > \tau) ) \frac{m_q (v)}{N_v}
\]
We want the complementarities to be amplified when the movies have higher ratings, so notice that each term in $\Tilde{u}_{q,g}$ is scaled by $m_q (v)$ relative to each term of $u_{q,g}$.
The constants $\lambda_1, \lambda_2$ were chosen such that the supermodular part slightly dominates the submodular part, since previous works already study functions that are primarily submodular. The code is contained in notebook ``Figure 2.''

\paragraph{Kernel Estimation for Figure~\ref{fig:movie_performance}}

For Algorithm~\ref{alg:smucb}, we choose the RBF kernel for movies, the linear kernel for users and the Jaccard kernel for a history of recommendations. The composite kernel $\kernel((u, v, A), (u', v', A')) = \kappa_1 \kernel_{\text{user}}(u, u') + \kappa_2 \kernel_{\text{movie}}(v, v') + \kappa_3 \kernel_{\text{history}}(A, A')$ for $\kappa_1, \kappa_2, \kappa_3 > 0$. For Algorithm~\ref{alg:mnnucb_dist}, we choose the RBF kernel for $o_\tvar$.


\paragraph{Active Learning.} This corresponds to Vignette~\ref{vignette:active_learning} with $m=1$ tasks.
We apply the  Naive-Bayes formulation of active learning in Equation~(5) of \cite{wei_active_learning} and set the submodular part as $f(A) = f^{\text{NB}}(A)$.
The supermodular part is the sum-sum-dispersion function as above $g (A) = \sum_{v_\tvar \in A} \sum_{v_j \in A: v_j \neq v_\tvar} B_{\tvar, j}$.
Here $B_{\tvar,j} = 0$ if $(v_\tvar, v_j)$ are from the same class, and $B_{\tvar,j} = 1/\text{dist}(v_\tvar, v_j)$ if $(v_\tvar, v_j)$ are from the opposite class; this encourages the selection of proximal points from different classes.

Here, we elaborate on the choice of submodular function. Assume our features are discrete - each point $v \in V$ has features $x_v \in \mathcal{X}$ (where $\mathcal{X}$ is some finite set) and binary label $y_v \in \{0,1\}$, denoted by the orange and blue colors in Figure~\ref{fig:active_learn}. Then, for any $(x \in \mathcal{X}, y \in \{0,1\})$ and for any subset of training points $S \subseteq V$, we can define
\[
m_{x, y} (S) = \sum_{v \in S} \mathbf{1} (x_v = x \land y_v = y)
\]
as the empirical count of the joint occurrence of $(x, y)$ in $S$. Then, inspired by the construction in \citeauthor{wei_active_learning}, we define the submodular part $f$ as
\[
f(S) = \sum_{x \in \mathcal{X}} \sum_{y \in \{0, 1\}} \sqrt{m_{x, y} (V) }\log(m_{x, y} (S))
\]
To obtain the finite set $\mathcal{X}$, we discretize our $2$-dimensional features into $56$ boxes. The square-root in the expression above does not occur in the original paper and was introduced by us due to better empirical performance. The intuition for constructing $f(\cdot)$ in this way is that the feature $x$ should appear alongside label $y$ in the chosen subset with roughly the same frequency as in the ground training set.
\end{document}