\documentclass[accepted]{uai2023} % for initial submission
% \documentclass[accepted]{uai2023} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like

%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2023} % ptmx math instead of Computer
% Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2023} % newtx fonts (improves upon
 % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{Deferences}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams
\usepackage{graphicx}
\usepackage{amssymb}
\usepackage{algorithm}
\usepackage{algorithmic}
\usepackage{amsmath}
\usepackage{amsmath,amssymb,amsthm}
% for cross referencing the main text
% PLEASE ONLY USE xr IN THE SUPPLEMENTADY MATEDIAL. 
% In the main paper, hard code any cross-reference to the supplementary material. 
\usepackage{xr} 
\externaldocument{zuo_713}

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{Regularized Online DR-Submodular Optimization\\(Supplementary Material)}

% The standard author block has changed for UAI 2023 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{Pengyu Zuo}
\author[1]{Yao Wang}
\author[2]{Shaojie Tang}

% Add affiliations after the authors
\affil[1]{%
    Xi'an Jiaotong University\\
    Xi'an, China
}
\affil[2]{%
    The University of Texas at Dallas\\
    Richardson, USA
}
\affil[ ]{zpyqwq@gmail.com, yao.s.wang@gmail.com, shaojie.tang@utdallas.edu}
  
  \begin{document}
  
\onecolumn %% Turn this off if single column is desired for the supplement
\maketitle
\appendix

\section{Proof of lemma 2}
\begin{proof}
The proof of Lemma 2 can be derived from \citep{zhang2022stochastic}. For the readers' convenience, we also give a proof here. First, we have a inequality about $\langle\mathbf{x},\nabla F(\mathbf{x})\rangle$, that is, 
\begin{equation}
\begin{aligned}
\langle\mathbf{x}, \nabla F(\mathbf{x})\rangle & =\int_0^1 e^{z-1}\langle\mathbf{x}, \nabla f(z * \mathbf{x})\rangle \mathrm{d} z \\
& =\int_0^1 e^{z-1} \mathrm{d} f(z * \mathbf{x}) \\
& =\left. e^{z-1}f(z * \mathbf{x})\right|_{z=0} ^{z=1}-\int_0^1 f(z * \mathbf{x}) e^{z-1} \mathrm{d} z \\
& \leq f(\mathbf{x})-\int_0^1 f(z * \mathbf{x})e^{z-1} \mathrm{d} z .
\end{aligned}
\end{equation}
Second, we also have an inequality about  $\langle\mathbf{y},\nabla F(\mathbf{x})\rangle$, that is, 
\begin{equation}
\begin{aligned}
\langle\mathbf{y}, \nabla F(\mathbf{x})\rangle & =\int_0^1 e^{z-1}\langle\mathbf{y}, \nabla f(z * \mathbf{x})\rangle \mathrm{d} z \\
& \geq \int_0^1 e^{z-1}\langle\mathbf{y} \vee(z * \mathbf{x})-z * \mathbf{x}, \nabla f(z * \mathbf{x})\rangle \mathrm{d} z \\
& \geq  \int_0^1 e^{z-1}(f(\mathbf{y} \vee(z * \mathbf{x}))-f(z * \mathbf{x})) \mathrm{d} z \\
& \geq (1-\frac{1}{e}) f(\mathbf{y})-\int_0^1  f(z * \mathbf{x}) e^{z-1} \mathrm{d} z
\end{aligned}
\end{equation}
where the first inequality holds because $\mathbf{y} \geq \mathbf{y} \vee(z * \mathbf{x})-z * \mathbf{x} \geq \mathbf{0} \text { and } \nabla f(z * \mathbf{x}) \geq \mathbf{0}$; the second one comes from the property that DR-submodular function is concave along any non-negative and non-positive direction \cite{bian2017continuous}; the final one comes from $f(\mathbf{y} \vee(z * \mathbf{x})) \geq f(\mathbf{y})$.


Finally, putting the inequality (1) and inequality (2) together, we have
$$
\begin{aligned}
\langle\mathbf{y}-\mathbf{x}, \nabla F(\mathbf{x})\rangle &\geq (1-\frac{1}{e}) f(\mathbf{y})-\int_0^1  f(z * \mathbf{x}) e^{z-1} \mathrm{d} z - \left(f(\mathbf{x})-\int_0^1 f(z * \mathbf{x})e^{z-1} \mathrm{d} z\right)\\
&\ge (1-\frac{1}{e}) f(\mathbf{y}) - f(\mathbf{x}).
\end{aligned} 
$$
\end{proof}
\section{Proof of Theorem 1}
\begin{proof}
Let $\tau$ be the stopping time of Algorithm 1, i.e. when $B_{\tau}<1$. We will complete the proof in three steps.

\textbf{Step 1:} We will bound the regret of $\mathcal{L}_{t}^{P}$ up to $\tau$.

\noindent Let $\mathbf{x}^{P}_{*}=\mathop{\arg\sup}\limits_{\mathbf{x}\in\mathcal{P}}\sum_{t=1}^{\tau}\mathcal{L}_{t}^{P}(\mathbf{x})$. We define $\nabla_t = \nabla \mathcal{L}^{P}_{t}(\mathbf{x}_t)$, and $\tilde{\nabla}_t=\nabla \tilde{\mathcal{L}_t^{P}}(\mathbf{x}_t)=\nabla\left(F(\mathbf{x}_t)+r(\mathbf{x})-\langle\lambda_t,c_t(\mathbf{x})\rangle\right)$. By the definition of $\mathbf{x}_{t+1}$ and properties of the projection operator for a convex set, we have
$$
\begin{aligned}
\left\|\mathbf{x}_{t+1}-\mathbf{x}^{P}_{*}\right\|^2&=\left\|\Pi_{\mathcal{P}}\left(\mathbf{x}_t+\eta_t \tilde{\nabla}_t\right)-\mathbf{x}^{P}_{*}\right\|^2 \leq\left\|\mathbf{x}_t+\eta_t \tilde{\nabla}_t-\mathbf{x}^{P}_{*}\right\|^2 \\
&\leq\left\|\mathbf{x}_t-\mathbf{x}^{P}_{*}\right\|^2+\eta_t^2\left\|\tilde{\nabla}_t\right\|^2-2 \eta_t \tilde{\nabla}_t^{\top}\left(\mathbf{x}^{P}_{*}-\mathbf{x}_t\right).
\end{aligned}
$$
Therefore we further have
$$
\begin{aligned}
\tilde{\nabla}_t^{\top}\left(\mathbf{x}^{P}_{*}-\mathbf{x}_t\right) & \leq \frac{\left\|\mathbf{x}_t-\mathbf{x}^{P}_{*}\right\|^2-\left\|\mathbf{x}_{t+1}-\mathbf{x}^{P}_{*}\right\|^2+\eta_t^2\left\|\tilde{\nabla}_t\right\|^2}{2 \eta_t} \\
& \leq \frac{\left\|\mathbf{x}_t-\mathbf{x}^{P}_{*}\right\|^2-\left\|\mathbf{x}_{t+1}-\mathbf{x}^{P}_{*}\right\|^2}{2 \eta_t}+\frac{\eta_t G^2}{2}, 
\end{aligned}
$$
where $G=\mathrm{sup}_{t}\left\|\tilde{\nabla}_t\right\|$.

If we define $\frac{1}{\eta_0}\triangleq 0$ and in light of Lemma 2, it can be deduced that
\begin{align*}
&\sum_{t=1}^\tau (1-\frac{1}{e})f_t(\mathbf{x}^{P}_{*})+r\left( \mathbf{x}^{P}_{*}\right)-\lambda_{t}c_t(\mathbf{x}^{P}_{*})-f_t(\mathbf{x}_t)-r\left( \mathbf{x}_t\right)+\lambda_{t}c_t(\mathbf{x^{t}})\\
&\le \sum_{t=1}^{\tau}\langle \nabla F(\mathbf{x}_t),\mathbf{x}^{P}_{*}-\mathbf{x}_t\rangle + \langle \nabla (r(\mathbf{x}_t)-\lambda_t c_{t}(\mathbf{x})),\mathbf{x}^{P}_{*}-\mathbf{x}_t\rangle\\
&=\sum_{t=1}^{\tau}\langle \tilde{\nabla}_t,\mathbf{x}^{P}_{*}-\mathbf{x}_t\rangle\\
&\le\frac{1}{2\eta_t}\sum_{t=1}^{\tau}\left\|\mathbf{x}_t-\mathbf{x}^{P}_{*}\right\|^2-\left\|\mathbf{x}_{t+1}-\mathbf{x}^{P}_{*}\right\|^2+\frac{G^2}{2}\sum_{t=1}^{\tau}\eta_t\\
&\le \frac{1}{2}(\sum_{t=1}^{\tau}\left\|\mathbf{x}_t-\mathbf{x}^{P}_{*}\right\|^2(\frac{1}{\eta_t}-\frac{1}{\eta_{t-1}}))+\frac{G^2}{2}\sum_{t=1}^{\tau}\eta_t\\
&\le \frac{D^2}{2\eta_\tau}+\frac{G^2}{2}\sum_{t=1}^{\tau}\eta_t\\
&\le O(\sqrt{\tau}), 
\end{align*}
where $D = \mathrm{sup}_{\mathbf{x},\mathbf{y}\in\mathcal{P}}\|\mathbf{x}-\mathbf{y}\|$.

\textbf{Step 2:} We will bound the regret of $\mathcal{L}_{t}^{D}$ up to $\tau$. 

Because $\mathcal{L}_{t}^{D}$ is a linear function, using the online gradient descent, we have $\mathop{\sup}\limits_{\lambda\in\mathcal{D}}\sum_{t=1}^{\tau}(\mathcal{L}^{D}_{t}(\lambda)-\mathcal{L}^{D}_{t}(\lambda_t))\le O(\sqrt{\tau})$ for any $\lambda$.

\textbf{Step 3:} Using the results of Steps 1 and 2, we can complete the proof.

\noindent
From Step 1, we have
$$
\mathop{\sup}\limits_{x\in\mathcal{P}}\sum_{t=1}^{\tau}((1-\frac{1}{e}) f_t(\mathbf{x})+r(\mathbf{x})-\langle\lambda_t,c_t(\mathbf{x})\rangle-f_t(\mathbf{x}_t)-r(\mathbf{x}_t)+\langle\lambda_t,c_t(\mathbf{x}_t)\rangle)\le O(\sqrt{\tau}).
$$
Then, by rearranging,
$$
\sum_{t=1}^{\tau}f_{t}(\mathbf{x}_t)+r(\mathbf{x}_t)\ge \mathop{\sup}\limits_{x\in\mathcal{P}}\sum_{t=1}^{\tau}((1-\frac{1}{e}) f_t(\mathbf{x})+r(\mathbf{x})-T
\langle\lambda_t,c_t(\mathbf{x})\rangle+\langle\lambda_t,c_t(\mathbf{x}_t)\rangle)-O(\sqrt{\tau}).
$$
From  Step 2, $\forall \lambda$ we have $\sum_{t=1}^{\tau}(\mathcal{L}^{D}_{t}(\lambda)-\mathcal{L}^{P}_{t}(\lambda_t))\le O(\sqrt{\tau})$. Then, by the definition of $\mathcal{L}^{D}_{t}$,
$$
\sum_{t=1}^{\tau}\langle\lambda_t,c_t(\mathbf{x}_t)\rangle\ge\sum_{t=1}^{\tau}(\langle\lambda_t,\rho\rangle-\langle\lambda,\rho\rangle+\langle\lambda,c_t(\mathbf{x}_t)\rangle)-O(\sqrt{\tau}).
$$
Therefore,

\begin{equation}
\sum_{t=1}^{\tau}f_{t}(\mathbf{x}_t)+r(\mathbf{x}_t)\ge-O(\sqrt{\tau})+\mathop{\sup}\limits_{x\in\mathcal{P}}\sum_{t=1}^{\tau}((1-\frac{1}{e} )f_t(\mathbf{x})+r(\mathbf{x})+\langle\lambda_t,\rho-c_t(\mathbf{x})\rangle-\langle\lambda,\rho-c_t(\mathbf{x}_t)\rangle).
\end{equation}

Next, we provide a lower bound on the following term.
$$
\text{\textcircled{A}}=\mathop{\sup}\limits_{x\in\mathcal{P}}\sum_{t=1}^{\tau}((1-\frac{1}{e} )f_t(\mathbf{x})+r(\mathbf{x})+\langle\lambda_t,\rho-c_t(\mathbf{x})\rangle).
$$
Let $\mathrm{APO}^{*}_{\tau}=\mathop{\sup}_{x\in\mathcal{P}}\sum_{t=1}^{\tau}\left((1-\frac{1}{e} ) f_t(\mathbf{x})+r(\mathbf{x})\right)$ and $\mathbf{x}^{*}=\mathop{\arg\sup
}_{x\in\mathcal{P}}\sum_{t=1}^{\tau}\left((1-\frac{1}{e} )f_t(\mathbf{x})+r(\mathbf{x})\right)$. $\mathrm{APO}^{*}_{\tau}$ represents the $(1-\frac{1}{e},1)$ approximate optimal value without constraints. We shall show that

\begin{equation}
\text{\textcircled{A}}\ge \rho \mathrm{APO}^{*}_{\tau}.    
\end{equation}

To do so, we consider two cases. First, if $\sum_{t=1}^{\tau}(1-\frac{1}{e} )f_{t}(\mathbf{x}^{*})\ge\sum_{t=1}^{\tau}\langle\lambda_t,c_{t}(\mathbf{x}^{*})\rangle$, then the value of the function for $\mathbf{x}^{*}$ is at least
$$
\begin{aligned}
\text{\textcircled{A}}&=\mathop{\sup}\limits_{x\in\mathcal{P}}\sum_{t=1}^{\tau}((1-\frac{1}{e} )f_t(\mathbf{x})+r(\mathbf{x})+\langle\lambda_t,\rho-c_t(\mathbf{x})\rangle)\\
&\ge \sum_{t=1}^{\tau}((1-\frac{1}{e} )f_t(\mathbf{x}^{*})+r(\mathbf{x}^{*})+\langle\lambda_t,\rho-c_t(\mathbf{x}^{*})\rangle)\\
&\ge \sum_{t=1}^{\tau}((1-\frac{1}{e} )f_t(\mathbf{x}^{*})+r(\mathbf{x}^{*})+\langle\lambda_t,\rho\cdot c_t(\mathbf{x}^{*})-c_t(\mathbf{x}^{*})\rangle)\\
&\ge \sum_{t=1}^{\tau}(1-\frac{1}{e} )f_t(\mathbf{x}^{*})+r(\mathbf{x}^{*})+(1-\rho)\sum_{t=1}^{\tau}\langle\lambda_t,c_t(\mathbf{x}^{*})\rangle\\
&\ge \rho\sum_{t=1}^{\tau}(1-\frac{1}{e} )f_t(\mathbf{x}^{*})+r(\mathbf{x}^{*})=\rho\mathrm{APO}^{*}_{\tau},
\end{aligned}
$$
where the second inequality holds since $c_t(\cdot)\in[0,1]$, for each $t\in[T]$. Otherwise, if $\sum_{t=1}^{\tau}(1-\frac{1}{e} )f_{t}(\mathbf{x}^{*})<\sum_{t=1}^{\tau}\langle\lambda_t,c_{t}(\mathbf{x}^{*})\rangle$, we have that
$$
\begin{aligned}
\text{\textcircled{A}}&\ge\sum_{t=1}^{\tau}\langle\lambda_{t},\rho\rangle\ge\sum_{t=1}^{\tau}\langle\lambda_{t},\rho\cdot c_t(\mathbf{x}^{*})\rangle\\
&\ge  \rho\sum_{t=1}^{\tau}(1-\frac{1}{e} )f_t(\mathbf{x}^{*})+r(\mathbf{x}^{*})=\rho\mathrm{APO}^{*}_{\tau}.    
\end{aligned}
$$
Combining inequality (3) and inequality (4), we get
$$
\sum_{t=1}^{\tau}(f_t(\mathbf{x}_t)+r(\mathbf{x}_t)-\langle\lambda,c_t(\mathbf{x}_t)\rangle)\ge-O(\sqrt{\tau})+\rho\mathrm{APO}^{*}_{\tau}-\tau\langle\lambda,\rho\rangle.
$$
In particular, we have
$$
\rho\mathrm{APO}^{*}_{\tau}\ge\rho\mathrm{APO}_{\tau}\ge\rho(\mathrm{APO}_{\tau}-T+\tau),
$$
where, $\mathrm{APO}_{\tau}$ is the $(1-\frac{1}{e},1)$ approximate optimal reward with constraints. By definition,  $\mathrm{REW}=\sum_{t=1}^{\tau}f_t(\mathbf{x}_t)+r(\mathbf{x}_t)$. Then,
$$
\begin{aligned}
\mathrm{REW}=\sum_{t=1}^{\tau}f_t(\mathbf{x}_t)+r(\mathbf{x}_t)&\ge -O(\sqrt{\tau})+\rho\mathrm{APO}^{*}_{\tau}-\sum_{t=1}^{\tau}\langle\lambda,\rho-c_t(\mathbf{x}_t)\rangle\\
&\ge -O(\sqrt{\tau})+\rho(\mathrm{APO}_{\tau}-T+\tau)-\sum_{t=1}^{\tau}\langle\lambda,\rho-c_t(\mathbf{x}_t)\rangle.
\end{aligned}
$$
If $\tau=T$, in order to get the result,  it is enough to set $\lambda=0$, and to substitute the above expression in the definition of regret. Otherwise, if $\tau<T$, which means that 
$$
\sum_{t=1}^{\tau}c_t(\mathbf{x}_t)+1\ge\rho T,
$$
where, in our setting, the largest possible cost is $1$. Then, we set $\lambda=1/\rho$ and thus, 
$$
\sum_{t=1}^{\tau}\langle\lambda,\rho-c_t(\mathbf{x}_t)\rangle=1/\rho\sum_{t=1}^{\tau}(\rho-c_t(\mathbf{x}_t))\le \tau-T+1/\rho.
$$
Then, by substituting the above expression
$$
\mathrm{REW}\ge -O(\sqrt{\tau})+\rho(\mathrm{APO}_{\tau}-T+\tau)-(\tau-T)-1/\rho.
$$
Finally, we have
$$
\begin{aligned}
\rho(1-\frac{1}{e},1)\mathrm{OPT}-\mathrm{REW}\le\rho\mathrm{APO}_{\tau}-\mathrm{REW}&\le  O(\sqrt{\tau})+(T-\tau)(\rho-1)+1/\rho\\
&\le  O(\sqrt{\tau})+1/\rho = O(\sqrt{T}).
\end{aligned}
$$
\end{proof}
\section{Proof of Proposition 1 }
\begin{proof}
 We use induction to prove this proposition. For the case when $n=2$, let $\mathbf{\mathbf{x}_1}\le\mathbf{x_2}, \mathbf{z}=\lambda \mathbf{\mathbf{x}_1}+(1-\lambda)\mathbf{x_2}$,  we have $\mathbf{\mathbf{x}_1}-\mathbf{z}\le\mathbf{0},$ and $\mathbf{x_2}-\mathbf{z}\ge\mathbf{0}$. Using the property that DR-submodular function is concave along any non-negative and non-positive direction~\cite{bian2017continuous}, we get
$$
\begin{aligned}
    f(\mathbf{\mathbf{x}_1})\le f(\mathbf{z}) + \nabla f(\mathbf{z})(\mathbf{\mathbf{x}_1}-\mathbf{z}),\\
    f(\mathbf{x_2})\le f(\mathbf{z}) + \nabla f(\mathbf{z})(\mathbf{x_2}-\mathbf{z}).
\end{aligned}
$$
Multiplying the first inequality by $\lambda$, the second equation by $1-\lambda$, and then adding the two inequalities together, we get the result for $n=2$.

\noindent To show that this is true for all natural numbers, we proceed by induction. Assume the proposition is true for some $n$ and then,
$$
\begin{aligned}
    f(\sum_{i=1}^{n+1}\lambda_i \mathbf{x_i})&=f(\lambda_1 \mathbf{\mathbf{x}_1}+\sum_{i=2}^{n+1}\lambda_i \mathbf{x_i})=f(\lambda_1 \mathbf{\mathbf{x}_1}+(1-\lambda_1)\frac{1}{1-\lambda_1}\sum_{i=2}^{n+1}\lambda_i \mathbf{x_i}) \\
\end{aligned}
$$
because $\mathbf{\mathbf{x}_1}\le\mathbf{x_i},\forall i=2,\dots,n+1,  \text{and} \sum_{i=2}^{n+1}\frac{\lambda_i}{1-\lambda_1}=1,  \text{we get }\mathbf{\mathbf{x}_1}\le\frac{1}{1-\lambda_1}\sum_{i=2}^{n+1}\lambda_i \mathbf{x_i}$,  finally we have
$$
\begin{aligned}
 f(\sum_{i=1}^{n+1}\lambda_i \mathbf{x_i})&\ge \lambda_1 f(\mathbf{\mathbf{x}_1})+(1-\lambda_1)f(\frac{1}{1-\lambda_1}\sum_{i=2}^{n+1}\lambda_i \mathbf{x_i})\\
&=\lambda_1 f(\mathbf{\mathbf{x}_1})+(1-\lambda_1)f(\sum_{i=2}^{n+1}\frac{\lambda_i}{1-\lambda_1} \mathbf{x_i})\\
&\ge \lambda_1 f(\mathbf{\mathbf{x}_1}) +(1-\lambda_1)\sum_{i=2}^{n+1}\frac{\lambda_i}{1-\lambda_1}f(\mathbf{x_i})\\
&=\sum_{i}^{n}\lambda_i f(\mathbf{x_i}).
\end{aligned}
$$
\end{proof}
\section{Proof of Theorem 2}
\begin{proof}
Let $\tau$ be the stopping time of Algorithm 2, i.e. when $B_{\tau}<1$. First, we will bound the regret up to $\tau$. Let $x^{*}$ be the best fixed action for Problem (2) defined in the main paper. Because $c_t(\mathbf{x}_t)\le1$, we have $\tau\ge\rho T$.
 Using the $L$-smoothness of $f$ and $r$ and the update rule of Algorithm 2, we have
$$
\begin{aligned}
f(\mathbf{x}_{t+1})+r(\mathbf{x}_{t+1})&\overset{(a)}{\ge} f(\mathbf{x}_t)+r(\mathbf{x}_t)+\frac{1}{T}\langle \mathbf{v}_t, \nabla f(\mathbf{x}_t)+\nabla r(\mathbf{x}_t)\rangle-\frac{L}{2T^{2}}\|\mathbf{v}_t\|^2\\
&\ge f(\mathbf{x}_t)+r(\mathbf{x}_t)+\frac{1}{T}\langle \mathbf{v}_t, \mathbf{d}_t+\nabla r(\mathbf{x}_t)\rangle+\frac{1}{T}\langle \mathbf{v}_t, \nabla f(\mathbf{x}_t)-\mathbf{d}_t\rangle-\frac{LD^2}{2T^2}\\
&\overset{(b)}{\ge} f(\mathbf{x}_t)+r(\mathbf{x}_t)+\frac{1}{T}\langle \mathbf{x}^{*}, \mathbf{d}_t+\nabla r(\mathbf{x}_t)\rangle+\frac{1}{T}\langle \mathbf{v}_t, \nabla f(\mathbf{x}_t)-\mathbf{d}_t\rangle-\frac{LD^2}{2T^2}\\
&= f(\mathbf{x}_t)+r(\mathbf{x}_t)+\frac{1}{T}\langle \mathbf{x}^{*}, \nabla f(\mathbf{x}_t)+\nabla r(\mathbf{x}_t)\rangle+\frac{1}{T}\langle \mathbf{v}_t-\mathbf{x}^{*}, \nabla f(\mathbf{x}_t)-\mathbf{d}_t\rangle\\&-\frac{LD^2}{2T^2}\\
&\overset{(c)}{\ge} f(\mathbf{x}_t)+r(\mathbf{x}_t)+\frac{1}{T}\langle (\mathbf{x}^{*}-\mathbf{x}_t)\vee 0, \nabla f(\mathbf{x}_t)+\nabla r(\mathbf{x}_t)\rangle\\&+\frac{1}{T}\langle \mathbf{v}_t-\mathbf{x}^{*}, \nabla f(\mathbf{x}_t)-\mathbf{d}_t\rangle-\frac{LD^2}{2T^2}\\
&\overset{(d)}{\ge} f(\mathbf{x}_t)+r(\mathbf{x}_t)+\frac{1}{T}(f(\mathbf{x}^{*}\vee \mathbf{x}_t)+r(\mathbf{x}^{*}\vee \mathbf{x}_t))-\frac{1}{T}(f(\mathbf{x}_t)+r(\mathbf{x}_t))+\\&\frac{1}{T}\langle \mathbf{v}_t-\mathbf{x}^{*}, \nabla f(\mathbf{x}_t)-\mathbf{d}_t\rangle-\frac{LD^2}{2T^2}\\
&\overset{(e)}{\ge} f(\mathbf{x}_t)+r(\mathbf{x}_t)+\frac{1}{T}(f(\mathbf{x}^{*})+r(\mathbf{x}^{*}))-\frac{1}{T}(f(\mathbf{x}_t)+r(\mathbf{x}_t))+\\&\frac{1}{T}\langle \mathbf{v}_t-\mathbf{x}^{*}, \nabla f(\mathbf{x}_t)-\mathbf{d}_t\rangle-\frac{LD^2}{2T^2}\\
\end{aligned}
$$
where $D = \mathrm{sup}_{\mathbf{x},\mathbf{y}\in\mathcal{P}}\|\mathbf{x}-\mathbf{y}\|$. The inequality $(a)$ comes from the $L$-smoothness of $f$ and $r$, inequality $(b)$ holds because the update rule of Algorithm 2, $(c)$ and $(e)$ are due to the monotonocity of $f$, and $(d)$  comes from the property that DR-submodular function is concave along any non-negative and non-positive direction. Defining $\mathbf{\epsilon}_t :=\mathbf{d}_t-\nabla f(\mathbf{x}_t)$ and rearranging the term in the above inequality, we have
$$
f(\mathbf{x}^{*})+r(\mathbf{x}^{*})-f(\mathbf{x}_{t+1})-r(\mathbf{x}_{t+1})\le (1-\frac{1}{T})(f(\mathbf{x}^{*})+r(\mathbf{x}^{*})-f(\mathbf{x}_t)-r(\mathbf{x}_t))+\frac{D}{T}\|\mathbf{\epsilon}_t\|+\frac{LD^2}{2T^2}.
$$
Applying the above inequality recursively, we further have
$$
f(\mathbf{x}^{*})+r(\mathbf{x}^{*})-f(\mathbf{x}_{t+1})-r(\mathbf{x}_{t+1})\le (1-\frac{1}{T})^t(f(\mathbf{x}^{*})+r(\mathbf{x}^{*})-f(\mathbf{x}_1)-r(\mathbf{x}_1)))+\frac{D}{T}\sum_{s=1}^{t}\|\epsilon_{s}\|+\frac{LD^2}{2T}.
$$
Using the above inequality and the fact that $\sum^{\tau-1}_{t=1}(1-\frac{1}{T})^t\le\sum_{t=1}^{\tau-1}e^{-t/T}\le \tau(e^{-1/T}-1/e^{\rho})\le \tau(1-1/e^{\rho})$, we get the $(\frac{1}{e^{\rho}},\frac{1}{e^{\rho}})$-$\mathcal{SR}_{\tau}$ is bounded by $\frac{\rho LD^2}{2}+\frac{D}{T}\sum_{t=1}^{\tau-1}\sum_{s=1}^{t}\epsilon_s$. Thus, we get
$$
\begin{aligned}
\mathbf{\epsilon}_t &= \mathbf{d}_t-\nabla f(\mathbf{x}_t)\\
&=(1-\eta_t)\epsilon_{t-1}+\eta_t(\nabla f_t(\mathbf{x}_t)-\nabla f(\mathbf{x}_t))\\&
+(1-\eta_t)(\nabla f_t(\mathbf{x}_t)-\nabla f_t(\mathbf{x}_{t-1})-(\nabla f(\mathbf{x}_t)-\nabla f(\mathbf{x}_{t-1}))).    
\end{aligned}
$$
Applying the above equality recursively, we obtain
$$
\begin{aligned}
\mathbf{\epsilon}_t &=\prod_{s=2}^{\tau}(1-\eta_t)\epsilon_1 + \sum_{m=1}^{t}\prod_{s=m}^{t}(1-\eta_t)(\nabla f_{m}(\mathbf{x}_{m})-\nabla f_{m}(\mathbf{x}_{m-1})-(\nabla f(\mathbf{x}_{m})-\nabla f(\mathbf{x}_{m-1})))\\
&+\sum_{m=2}^{t}\eta_t\prod_{m+1}^{t}(1-\eta_t)(\nabla f_{m}(\mathbf{x}_{m})-\nabla f(\mathbf{x}_{m})).   
\end{aligned}
$$
Let $\epsilon_1=\sum_{m=1}^{t}\xi_{t,m}$, where $\xi_{t, 1}=\prod_{s=2}^{\tau}(1-\eta_t)\epsilon_1$ and $\xi_{t, m}=\prod_{s=m}^{t}(1-\eta_t)(\nabla f_{m}(\mathbf{x}_{m})-\nabla f_{m}(\mathbf{x}_{m-1})-(\nabla f(\mathbf{x}_{m})-\nabla f(\mathbf{x}_{m-1})))+\eta_t\prod_{m+1}^{t}(1-\eta_t)(\nabla f_{m}(\mathbf{x}_{m})-\nabla f(\mathbf{x}_{m}))$ for $m>1$. Let $\mathcal{F}_{t}$ be the $\sigma$-field generated by $\{f_s\}_{s=1}^{m-1}$. Clearly, $\mathbb{E}[\xi_{t,1}]=0$. Also, for $m>1$, we have
$$
\begin{aligned}
    \mathbb{E}[\xi_{t,m}|\mathcal{F}_{m}]&=\prod_{s=m}^{t}(1-\eta_t)(\nabla f(\mathbf{x}_{m})-\nabla f(\mathbf{x}_{m-1})-(\nabla f(\mathbf{x}_{m})-\nabla f(\mathbf{x}_{m-1})))\\&+\eta_t\prod_{m+1}^{t}(1-\eta_t)(\nabla f(\mathbf{x}_{m})-\nabla f(\mathbf{x}_{m}))\\
    &=0.
\end{aligned}
$$
Therefore, for all $t\in \tau$, $\{\xi_{t,m}\}_{m=1}^{t}$ is a martingale difference sequence. For any $m\in[t]$, we can write
$$
\prod_{s=m}^{t}(1-\eta_t)=\prod_{s=m}^{t}(1-\frac{1}{s+1})=\prod_{s=m}^{t}(\frac{s}{s+1})=\frac{m}{t+1}.
$$
Thus we have $\|\xi_{t,1}\|=\frac{2}{t+1}\|\nabla f_1(\mathbf{x}_1)-\nabla f(\mathbf{x}_1)\|\le\frac{2\sigma}{t+1}$. For $m>1$, we have
$$
\begin{aligned}
\|\xi_{t,m}\|\le&\prod_{s=m}^{t}(1-\eta_t)(\|\nabla f(\mathbf{x}_{m})-\nabla f(\mathbf{x}_{m-1})\|+\|(\nabla f(\mathbf{x}_{m})-\nabla f(\mathbf{x}_{m-1}))\|)\\&+\eta_t\prod_{m+1}^{t}(1-\eta_t)\|\nabla f(\mathbf{x}_{m})-\nabla f(\mathbf{x}_{m})\|\\
\le& \frac{2Lm}{t+1}\|\mathbf{x}_{m}-\mathbf{x}_{m-1}\|+\frac{\sigma}{t+1}\\
\le&\frac{2LDm/\tau+m}{t+1}\\
\le&\frac{2LD+m}{t+1}.
\end{aligned}
$$
Using the concentration inequality for vector-valued martingales, we have
$$\begin{aligned}
\mathbb{P}(\|\mathbf{\epsilon}_t\|\ge \lambda_t)&\le 2\mathrm{exp}\left(-\frac{\lambda_t^{2}}{(\frac{2\sigma}{t+1})^2+(t-1)(\frac{2LD+m}{t+1})^2}\right)
&\le 2 \mathrm{exp}\left(-\frac{\lambda_t^{2}(t+1)}{(2LD+2\sigma)^2}\right).
\end{aligned}
$$
Therefore, we can get the expected regret bound of the algorithm:
$$
\begin{aligned}
    \mathbb{E}\|\mathbf{\epsilon}_t\|&=\int_{\lambda=0}^{\infty}\mathbb{P}(\|\mathbf{\epsilon}_t\|\ge\lambda)d\lambda\\
    &\le  \int_{\lambda=0}^{\infty}2 \mathrm{exp}\left(-\frac{\lambda_t^{2}(t+1)}{(2LD+2\sigma)^2}\right)d\lambda\\
    &=\int_{\lambda=0}^{\infty}2 \mathrm{exp}(-x^{2})\frac{2LD+2\sigma}{\sqrt{t=1}}dx\\
    &=\frac{2\sqrt{\pi}(LD+\sigma)}{\sqrt{t+1}}.
\end{aligned}
$$
As a consequence, the expected regret bound is $O(\sqrt{\tau})$.

Now, we get the regret up to $\tau$. Let $\mathrm{REW}_{\tau}$ be the reward that we get, and $\mathrm{OPT}_{\tau}$ be the optimal reward till $\tau$. So we have:
$$
\mathbb{E}(\frac{1}{e^{\rho}}\mathrm{OPT}_{\tau}-\mathrm{REW}_{\tau})\le O(\sqrt{\tau})=O(\sqrt{T}).
$$
Because the $f_t, c_t$  are sampled i.i.d from $\mathcal{D}$ and $\tau\ge\rho T$,  we get $\mathbb{E}(\mathrm{OPT}_{\tau})\ge\rho \mathbb{E}(\mathrm{OPT}_{T})$. Therefore,
$$
\mathbb{E}(\frac{\rho}{e^{\rho}}\mathrm{OPT}_{T}-\mathrm{REW}_{\tau})\le\mathbb{E}(\frac{1}{e^{\rho}}\mathrm{OPT}_{\tau}-\mathrm{REW}_{\tau})\le O(\sqrt{T}).
$$
\end{proof}
\bibliography{zuo_713}

\end{document}
