% \documentclass{uai2022} % for initial submission
\documentclass[accepted]{uai2022} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2022} % ptmx math instead of Computer
                                         % Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2022} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%The UAI 2022 paper style is based on a custom \textsf{uai2022} class.
%The class file sets the page geometry and visual style.\footnote{%
%    The class uses the packages \textsf{adjustbox}, \textsf{environ}, \textsf{letltxmacro}, \textsf{geometry}, \textsf{footmisc}, \textsf{caption}, \textsf{textcase}, \textsf{titlesec}, \textsf{titling}, \textsf{authblk}, \textsf{enumitem}, \textsf{microtype}, \textsf{lastpage}, and \textsf{kvoptions}.
%}
%The class file also loads basic text fonts.\footnote{%
%    Fonts loaded are \textsf{times} (roman), \textsf{helvet} (sanserif), \textsf{courier} (fixed-width), and \textsf{textcomp} (common symbols).
%}
%\emph{You may not modify the geometry or style in any way, for example, to squeeze out a little bit of extra space.}
%(Also do not use \verb|\vspace| for this.)
%Feel free to use convenience functionality of loaded packages such as \textsf{enumitem}.
%The class enables hyperlinking by loading the \textsf{hyperref} package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{amsmath, amsfonts, amsthm, amssymb, here, dsfont, hyperref}
\usepackage{graphicx,color,subfigure,multirow, here}
\usepackage[utf8]{inputenc} 
\usepackage[T1]{fontenc}
\usepackage{enumitem}
\usepackage{url}
\usepackage{algorithm} 
\usepackage{algorithmic}
\usepackage{xr}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newtheorem{theo}{Theorem}[section]
\newtheorem{definition}[theo]{Definition}
\newtheorem{prop}[theo]{Proposition}
\newtheorem{propri}[theo]{Property}
\newtheorem{coro}[theo]{Corollary}
\newtheorem{lemme}[theo]{Lemma}
\newtheorem{rem}[theo]{Remark}
\newtheorem{ex}[theo]{Example}
\newtheorem{ass}[theo]{Assumption}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\newcommand{\argmax}[1]{\underset{#1}{\operatorname{arg}\!\operatorname{max}}\;}
\newcommand{\argmin}[1]{\underset{#1}{\operatorname{arg}\!\operatorname{min}}\;}
\newcommand{\n}{\noindent }
\newcommand{\w}{\widehat}
\newcommand{\wt}{\widetilde}
\newcommand{\one}{\mathds{1}}
\newcommand{\cA}{\mathcal{A}}
\newcommand{\cB}{\mathcal{B}}
\newcommand{\B}{\mathbb{B}}
\newcommand{\cC}{\mathcal{C}}
\newcommand{\cD}{\mathcal{D}}
\newcommand{\E}{\mathbb{E}}
\newcommand{\cE}{\mathcal{E}}
\newcommand{\cF}{\mathcal{F}}
\newcommand{\F}{\mathbb{F}}
\newcommand{\cH}{\mathcal{H}}
\newcommand{\cP}{\mathcal{P}}
\renewcommand{\P}{\mathbb{P}}
\newcommand{\cR}{\mathcal{R}}
\newcommand{\R}{\mathbb{R}}
\newcommand{\cS}{\mathcal{S}}
\newcommand{\cT}{\mathcal{T}}
\newcommand{\cY}{\mathcal{Y}}
\newcommand{\dd}{\text{{\rm d}}}



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\externaldocument{denis_276}















%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{Multiclass Classification for Hawkes Processes\\(Supplementary material)}

% The standard author block has changed for UAI 2022 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{\href{christophe.denis@univ-eiffel.fr}{Christophe Denis}{}}
\author[2]{\href{charlotte.dion_blanc@sorbonne-universite.fr.fr}{Charlotte Dion-Blanc}{}}
\author[3]{\href{laure.sansonnet@agroparistech.fr}{Laure Sansonnet}{}}
% Add affiliations after the authors
\affil[1]{%
    LAMA\\
    Université Gustave Eiffel\\
    France
}
\affil[2]{%
    LPSM\\
    Sorbonne Université\\
    France
}
\affil[3]{%
    AgroParisTech, MIA Paris-Saclay\\
    Université Paris-Saclay\\
    France
  }
  
  \begin{document}

\maketitle

In this supplementary material, we give first a technical result in Section~\ref{app:secA}. Then, Section~\ref{app:secB} proposes the proofs of main results.

For the sake of simplicity we denote $\cT$ for $\cT_T$.
We use in the sequel the notation $C$ which represents a positive constant that does not depend on $n$. Each time $C$ is written in some equation, one should understand that there exists a positive constant such that the equation holds. Therefore, the values of $C$ may change from line to line and even change in the same equation. When an index $K$ appears, $C_K$ represents a constant depending on $K$ (and not on $n$).

\section{A technical result}\label{app:secA}

Let us remind the reader that 
$\cE\left(g\right)=\cR(g)-\cR(g^*)$
for any classifier $g \in \mathcal{G}$.

\begin{prop}
\label{prop:excessrisk}
For any classifier $g \in \mathcal{G}$, we have

\begin{equation*}
\cE\left(g\right) =\mathbb{E}\left[\sum_{i, \, k \neq i}^K |\pi^*_i(\cT)-\pi^*_k(\cT)|\one_{\{g^*(\cT)=i, g(\cT)=k\}}\right].    
\end{equation*}

%\begin{eqnarray*}
%&&\cE\left(g\right)\\
%&&= \E\left[\sum_{
%\begin{tablular}
%        i,k;
%        k\neq i
%\end{tablular}
%}^K
%|\pi^*_i(\mathcal{T}_T)-\pi^*_k(\mathcal{T}_T)|\one_{\{g(\cT_T)=k\}}\one_{\{g^*(\cT_T)=i\}}\right].
%\end{eqnarray*}
\end{prop}

\begin{proof}
Let  $g\in \mathcal{G}$,
we have:

\begin{align*}
& \cE\left(g\right) =  \mathbb{E}\left[\one_{\{g(\cT) \neq Y\}} - \one_{\{g^*(\cT) \neq Y\}}\right]\\
&= 
\E\Bigg[\sum_{i=1}^{K}\sum_{j=1}^{K}\sum_{k=1}^{K} \pi^*_i(\cT) \left( \one_{\{g(\cT)\neq i\}}\right. \\
&\hspace{3em} \left.-\one_{\{g^*(\cT)\neq i \}}\right)  \one_{\{g^*(\cT)=j\}}\one_{\{g(\cT)=k\}}\Bigg]\\
 &=\E\left[ \sum_{i=1}^{K}\sum_{k \neq i }  
\pi^*_i(\cT)\one_{\{g(\cT)=k \}}\one_{\{g^*(\cT)=i\}}
\right.\\
&\hspace{3em} \left.-\sum_{k=1}^{K}\sum_{i \neq k } \pi^*_k(\cT)\one_{\{g(\cT)=k\}}\one_{\{g^*(\cT)=i\}}
 \right]\\
 &= \E\left[\sum_{i, \, k \neq i}^K 
(\pi^*_i(\cT)-\pi^*_k(\cT))\one_{\{g(\cT)=k\}}\one_{\{g*(\cT)=i\}}
 \right].
\end{align*}

We deduce the result of Proposition~\ref{prop:excessrisk} from the following observation on the event $\{g^*(\cT)=i\}$
\begin{equation*} 
\pi^*_i(\cT)-\pi^*_k(\cT) = |\pi^*_i(\cT)-\pi^*_k(\cT)|.
\end{equation*}
\end{proof}





\section{Proofs of main results}\label{app:secB}

%%%%%%%%%%%%%%%%%%%%
\begin{proof}[Proof of Proposition~\ref{prop:prop1}]
%%%%%%%%%%%%%%%%%%%%
We first denote for all $k\in\cY$
$$\Phi^k_t:=\frac{d{\mathbb P}_k|_{{\mathcal F}_t^N}}{d{\mathbb P}_0|_{{\mathcal F}^N_t}},$$
with $\mathcal{F}_T^N:=\sigma\left(\cT_T\right) = \sigma\left(N_t, 0 \leq t \leq T\right)$.
We classically obtain:
$$\log(\Phi^k_t) = - \int_0^t (\lambda^*_k(s)-1) \;\dd s +  \int_0^t\log( \lambda^*_k(s) ) \;\dd N_s,$$
by writing {\it w.r.t.} a Poisson process measure of intensity 1 (see Chapter 13 of \citep{DVJ}).
Thus, for $t\geq 0$, we have the following equation for the mixture measure 
$$
d{\mathbb P}|_{{\mathcal F}^N_t} = \sum_{k=1}^K p_k d{\mathbb P}_k|_{{\mathcal F}^N_t} = \sum_{k=1}^K p_k \Phi^k_t d{\mathbb P}_0|_{{\mathcal F}^N_t}
$$
and then
$$
\frac{d{\mathbb P}_k|_{{\mathcal F}^N_t}}{d{\mathbb P}|_{{\mathcal{F}}^N_t}} = \frac{p_k\Phi_t^k d{\mathbb P}_0|_{{\mathcal F}^N_t}}{\sum_{j=1}^K p_j \Phi^j_t d{\mathbb P}_0|_{{\mathcal F}^N_t}} = \frac{\Phi_t^k}{\sum_{j=1}^K p_j \Phi^j_t}. 
$$
Finally, by using the definition of $F_k^*$, it comes 
$$\pi^*_k\left(\mathcal{T}_T\right) = \frac{p^*_k{\rm e}^{F_k^*}}{\sum_{j=1}^K p^*_j {\rm e}^{F_j^*}},$$
that concludes the proof.
%We first note
%$\sigma\left(\cT_T\right) = \sigma\left(N_t, 0 \leq t \leq T\right) := \mathcal{F}_T^N$.
%Besides let us denote 
%$$\Phi^k_t:=\frac{d{\mathbb P}_k|_{{\mathcal F}_t^N}}{d{\mathbb P}_0|_{{\mathcal F}^N_t}}, k \in \cY$$
%with respect to a Poisson process measure of intensity $1$, it comes
%\begin{eqnarray*}
%\log(\Phi^k_t) &=& - \int_0^t (\lambda^{*(k)}_s -1)ds +  \int_0^t\log( \lambda^{*(k)}_s )dN_s
%\end{eqnarray*}
%see \cite{DVJ} Chapter 13. 
%
%Thus, for $t\geq 0$, we have the following equality for the mixture measure 
%
%$$
%d{\mathbb P}|_{{\mathcal F}^N_t} = \sum_{k=1}^K p_k d{\mathbb P}_k|_{{\mathcal F}^N_t} = \sum_{k=1}^K p_k \Phi^k_t d{\mathbb P}_0|_{{\mathcal F}^N_t}
%$$
%
%
%and finally
%$$
%\frac{d{\mathbb P}_k|_{{\mathcal F}^N_t}}{d{\mathbb P}|_{{\mathcal{F}}^N_t}} = \frac{p_k\Phi_t^k d{\mathbb P}_0|_{{\mathcal F}^N_t}}{\sum_{j=1}^K p_j \Phi^j_t d{\mathbb P}_0|_{{\mathcal F}^N_t}} = \frac{\Phi_t^k}{\sum_{j=1}^K p_j \Phi^j_t}. 
%$$
%Thus, denoting \\
%$\displaystyle F_k^*(\cT)= -\int_0^T \lambda^{(\mu^*, h_k^*)}(s)ds + \sum_{T_i \in \cT} \log(\lambda^{(\mu^*, h_k^*)}(T_i))$ it comes 
%$\displaystyle \pi^*_k(\cT)= \frac{p_k^* e^{F_k^*}}{\sum_{j=1}^K p_j^*F_j^*}$.
\end{proof}






\begin{proof}[Proof of Proposition~\ref{prop:distPi}]




Let $({\bf p},\mu,{\bf h})$ and $({\bf p}^{'}, \mu^{'}, {\bf h}^{'})$ two tuples. We denote $\pi$ and $\pi^{'}$ the associated elements in $\Pi$ (see Equation~\eqref{def:Pi}). We have that
\begin{eqnarray}
\label{eq:eqdistPiFirstDecomp}
\left\|\pi(\cT)-\pi^{'}(\cT)\right\|_1 &\leq &\left\|\pi(\cT) - \boldsymbol\pi_{{\bf p},\mu^{'},{\bf h}^{'}}(\cT)\right\|_1 \nonumber\\
&&+ \left\|\boldsymbol\pi_{{\bf p},\mu^{'},{\bf h}^{'}}(\cT)- \pi^{'}(\cT)\right\|_1.
\end{eqnarray}  
Since for any $k$, $j$ and $(x_1, \ldots, x_K)$, 
\begin{equation*}
\left| \dfrac{\partial \phi^{{\bf p}}_k(x_1, \ldots,x_K)}{\partial p_j}\right| \leq  \dfrac{1}{p_0},
\end{equation*}
we deduce by mean value inequality
\begin{equation*}
\left\|\boldsymbol\pi_{{\bf p},\mu^{'},{\bf h}^{'}}(\cT)- \pi^{'}(\cT)\right\|_1 \leq \dfrac{K}{p_0} \left\|{\bf p}-{\bf p}^{'}\right\|_1.
\end{equation*}
Besides for any $k$, $j$ and ${\bf p}$,
\begin{equation*}
\left| \dfrac{\partial \phi^{{\bf p}}_k(x_1, \ldots,x_K)}{\partial x_j}\right| \leq  1,
\end{equation*}
we also deduce
\begin{align*}
& \left\|\pi(\cT) - \boldsymbol\pi_{{\bf p},\mu^{'},{\bf h}^{'}}(\cT)\right\|_1 \\
& \leq K\sum_{k=1}^K \left| F^{(\mu,h_k)}(\cT)-F^{(\mu^{'},h^{'}_k)}(\cT) \right|.
\end{align*}
Therefore, from Equation~\eqref{eq:eqdistPiFirstDecomp}, we obtain
\begin{multline*}
\hspace{-4mm}\mathbb{E} \left[\left\|\pi(\cT)-\pi^{'}(\cT)\right\|_1\right] \leq
 \dfrac{K}{p_0} \left\|{\bf p}-{\bf p}^{'}\right\|_1 \\
 + K\sum_{k=1}^K \mathbb{E}\left[\left| F^{(\mu,h_k)}(\cT)-F^{(\mu^{'},h^{'}_k)}(\cT) \right|\right].
\end{multline*}
Hence, it remains to bound the second term in the {\it r.h.s.} of the above inequality.
Using Cauchy-Schwarz inequality, for each $k$, we have that
\begin{multline}
\label{eq:eqdecompFdistPi}
\mathbb{E}\left[\left| F^{(\mu,h_k)}(\cT)-F^{(\mu^{'},h^{'}_k)}(\cT) \right|\right] \\
 =  \E\left[ \left| \int_0^T \log\left( \frac{\lambda^{(\mu,h_k)}(t)}{\lambda^{(\mu^{'},h^{'}_k)}(t)} \right) \,\dd N_t \right.\right.\\
 \hspace{5mm} \left.\left.- \int_0^T\left(\lambda^{(\mu,h_k)}(t) - \lambda^{(\mu^{'},h^{'}_k)}(t)\right) \, \dd t \right| \right]\\
 \leq   \E\left[ \left(\int_0^T \left|\log\left( \frac{\lambda^{(\mu,h_k)}(t)}{\lambda^{(\mu^{'},h^{'}_k)}(t)} \right)\right| \,\dd N_t\right)^2 \right]^{1/2}  \\
 + \E\left[\int_0^T\left|\lambda^{(\mu,h_k)}(t) - \lambda^{(\mu^{'},h^{'}_k)}(t)\right| \, \dd t  \right].
% =  \E\left[ \left| \int_0^T \log\left( \frac{\lambda^{k}_t(\mu, {\bf h})}{\lambda^{k}_t(\mu^{'},{\bf h}^{'})} \right) dN_t \right.\right.\\
 %\hspace{5mm} \left.\left.- \int_0^T\left( \lambda_t^{k}(\mu, {\bf h}) - \lambda_t^{k}(\mu^{'},{\bf h}^{'})\right) dt \right| \right]\\
 %\leq   \E\left[ \left( \int_0^T \left| \log\left( \frac{\lambda_t^{k}(\mu,{\bf h})}{\lambda^{k}_t(\mu^{'},{\bf h}^{'})} \right)  \right| dN_t\right)^2 \right]^{1/2}  \\
 %+ \E\left[\int_0^T\left| \lambda_t^{k}(\mu,{\bf h})- \lambda^{k}_t(\mu^{'}, {\bf h}^{'})\right| dt  \right].
\end{multline}
Now, we observe that
\begin{equation*}
\left|\lambda^{(\mu,h_k)}(t) - \lambda^{(\mu^{'},h^{'}_k)}(t)\right| \leq |\mu^{'}-\mu| + \|{\bf h} - {\bf h}^{'}\|_{\infty,T} N_T, 
\end{equation*}
where $N_T= N_{[0,T]}$ denotes the number of jump times of the observed process lying on $[0,T]$.
Therefore we deduce
\begin{eqnarray}
\label{eq:eqDecompPiEq1}
&&\E\left[\int_0^T\left|\lambda^{(\mu,h_k)}(t) - \lambda^{(\mu^{'},h^{'}_k)}(t)\right| \, \dd t  \right]  \nonumber\\ 
&&\leq T\left(|\mu^{'}-\mu| 
+   \|{\bf h} - {\bf h}^{'}\|_{\infty,T} \E\left[N_T\right]\right).
\end{eqnarray}
Now, we bound the first term in the {\it r.h.s.} of Equation~\eqref{eq:eqdecompFdistPi}.
Using that $x \mapsto \log(1+x)$ is Lipschitz we obtain: 
%\begin{eqnarray}
\begin{multline}\label{eq:eqDecompPiEq2}
%\nonumber
\left|\log\left( \frac{\lambda^{(\mu,h_k)}(t)}{\lambda^{(\mu^{'},h^{'}_k)}(t)} \right)\right| \leq  
 \left|\log\left(\frac{\mu}{\mu^{'}}\right)\right|\\
  + \left|\frac{\lambda^{(\mu,h_k)}(t)}{\mu^{'}} - \frac{\lambda^{(\mu^{'},h^{'}_k)}(t)}{\mu} \right| \\ 
%\nonumber
 \leq  \dfrac{1}{\mu_0}\left|\mu - \mu^{'}\right| + \frac{1}{\mu_0^2}\left|\mu\lambda^{(\mu,h_k)}(t) - \mu^{'}\lambda^{(\mu^{'},h^{'}_k)}(t)\right| \\ 
%\nonumber
 \leq    \dfrac{1}{\mu_0}\left|\mu - \mu^{'}\right| + \frac{1}{\mu_0^2} \left(|\mu -\mu^{'}| \lambda^{(\mu^{'},h^{'}_k)}(t) \right.\\
 \left.+ \mu_1 \left|\lambda^{(\mu,h_k)}(t)-\lambda^{(\mu^{'},h^{'}_k)}(t) \right|\right)\\ 
 \leq    \dfrac{1}{\mu_0}\left|\mu - \mu^{'}\right| + \frac{1}{\mu_0^2} \left(|\mu -\mu^{'}| \lambda^{(\mu^{'},h^{'}_k)}(t) \right.\\
 \left.+ \mu_1 \left(|\mu^{'}-\mu| + \|{\bf h} - {\bf h}^{'}\|_{\infty,T} N_T\right)\right).
%\end{eqnarray}
\end{multline}
Using Doob's decomposition, we get
\begin{eqnarray}\label{eq:eqDecompPiEq3}
&& \hspace{-2mm}\E\left[ \left(\int_0^T \left|\log\left( \frac{\lambda^{(\mu,h_k)}(t)}{\lambda^{(\mu^{'},h^{'}_k)}(t)} \right)\right| \,\dd N_t\right)^2 \right] =\nonumber\\ 
&&\E\left[  \int_0^T \log^2\left( \frac{\lambda^{(\mu,h_k)}(t)}{\lambda^{(\mu^{'},h^{'}_k)}(t)} \right) \lambda^*_Y(t) \, \dd t \right] \nonumber\\
&&+  \E\left[ \left( \int_0^T\left| \log\left( \frac{\lambda^{(\mu,h_k)}(t)}{\lambda^{(\mu^{'},h^{'}_k)}(t)} \right) \right|
 \lambda^*_Y(t) \, \dd t \right)^2 \right].
\end{eqnarray}
Using that $\E\left[\left( \lambda^*_Y(t)\right)^2\right]<\infty$, the first term in the {\it r.h.s.} in Equation~\eqref{eq:eqDecompPiEq3} can be bounded as follows
\begin{multline*}
\E\left[  \int_0^T \log^2\left( \frac{\lambda^{(\mu,h_k)}(t)}{\lambda^{(\mu^{'},h^{'}_k)}(t)} \right) \lambda^*_Y(t) \, \dd t \right] \\
\leq \int_0^T \E \left[ \log^4\left( \frac{\lambda^{(\mu,h_k)}(t)}{\lambda^{(\mu^{'},h^{'}_k)}(t)} \right)  \right]^{1/2} \E\left[\left( \lambda^*_Y(t)\right)^2\right]^{1/2} \, \dd t\\
 \leq  C T \sup_{t \in [0,T]} \E \left[ \log^4\left( \frac{\lambda^{(\mu,h_k)}(t)}{\lambda^{(\mu^{'},h^{'}_k)}(t)} \right)  \right]^{1/2}.
\end{multline*}
Similarly, we obtain:
\begin{multline*}
 \E\left[ \left( \int_0^T\left| \log\left( \frac{\lambda^{(\mu,h_k)}(t)}{\lambda^{(\mu^{'},h^{'}_k)}(t)} \right) \right|
 \lambda^*_Y(t) \, \dd t \right)^2 \right]\\
  \leq  T \E \left[\int_0^T \log^2\left( \frac{\lambda^{(\mu,h_k)}(t)}{\lambda^{(\mu^{'},h^{'}_k)}(t)} \right)  \left( \lambda^*_Y(t)\right)^2 \, \dd t \right] \\
 \leq  C T^2 \sup_{t \in [0,T]} \E \left[ \log^4\left( \frac{\lambda^{(\mu,h_k)}(t)}{\lambda^{(\mu^{'},h^{'}_k)}(t)} \right)  \right]^{1/2}.
\end{multline*}
%Hence combining the above inequalities with Equation~\eqref{eq:eqDecompPiEq3}, and~\eqref{eq:eqDecompPiEq2},
Then, by Assumption~\ref{ass:h}, we get
\begin{multline*}
 \E\left[ \left(\int_0^T \left|\log\left( \frac{\lambda^{(\mu,h_k)}(t)}{\lambda^{(\mu^{'},h^{'}_k)}(t)} \right)\right| \,\dd N_t\right)^2 \right] \\
\leq  C\left(|\mu-\mu^{'}|^2+ \|{\bf h} - {\bf h}^{'}\|^2_{\infty,T}\right)\\
                       \leq  C \left(2\mu_1|\mu-\mu^{'}|+ \|{\bf h} - {\bf h}^{'}\|^2_{\infty,T}\right),
\end{multline*}
where $C$ is constant which depends on $\mu_0,\mu_1,{\bf h}^{*}, A_1$, and $T$.
Finally, combining the above equation, Equations~\eqref{eq:eqDecompPiEq1} and~\eqref{eq:eqdecompFdistPi}  yields the desired result.
\end{proof}



\begin{proof}[Proof of Corollary ~\ref{coro:excessRiskPi}]

Let $\pi \in \Pi$. We recall that
\begin{equation*}
g_{\pi}(\mathcal{T}) = \argmax{k \in \mathcal{Y}} \pi^k(\mathcal{T})
\end{equation*}
for $h \in \cH$. 
By Proposition~\ref{prop:excessrisk} we then get
\begin{align*}
%\begin{multline*}
0 &\leq  \cE({g}_{\pi}) \\
 &=  \E\left[ \sum_{i, \, k\neq i}^{K}  
|\pi^*_i(\cT)-\pi^*_k(\cT)|\one_{\{{g}_{\pi}(\cT)=k\}}\one_{\{g^*(\cT)=i\}}
 \right]\\
  &\leq  2 \E\left[\max_{k \in \mathcal{Y}} |\pi^k(\cT)-{{\pi}}^*_k(\cT)| \one_{\{{g}_{\pi}(\cT)\neq g^*(\cT)\}}\right]\\
   &\leq 2 \sum_{k=1}^K \E\left[ |\pi^k(\cT)-{{\pi}}^*_k(\cT)| \right].
\end{align*}
%\end{multline*}
Finally, applying Proposition~\ref{prop:distPi}, we obtain the desired result.
\end{proof}


%----------------------------------------------------
\begin{proof}[Proof of Theorem~\ref{thm:riskERM1}]

%We set $\varepsilon= 1/n$ in Assumption~\ref{ass:assOnHNet}.
Let us remind the reader that $\w{\bf p}= (\w{p}_k)_{k=1, \ldots, K}$ with $\w{p}_k= \frac{1}{n}\sum_{i=1}^n \one_{Y_i=k}$.
We consider the following set $\mathcal{A} = \left\{{\bf \w{p}} : \min({\bf \w{p}}) \geq \frac{p_0}{2}\right\}$, where $p_0$ is defined in Assumption~\ref{ass:prob}.

On the one hand, note that on $\mathcal{A}^c$ we have 
\begin{equation*}
|\min({\bf p^*}) -  \min({\bf \w{p}})| \geq \frac{p_0}{2}, 
\end{equation*}
which implies that there exists $k\in\mathcal{Y}$ s.t. $|p^*_k - \w{p}_k| \geq \frac{p_0}{2}$. Thus, by using Hoeffding's inequality we get
\begin{eqnarray}\label{eq:hoeffding1}
\mathbb{P}(\mathcal{A}^c) &\leq & \sum_{k=1}^{K} \mathbb{P}\left(|p^*_k - \w{p}_k| \geq \frac{p_0}{2}\right) \nonumber\\
    & \leq& 2K {\rm e}^{-np_0^2/2}.
\end{eqnarray}

On the other hand, we focus on what happens on the event $\cA$.
%\underline{On $\cA$}
%Let us study what happens on $\cA$. We define
First, we define
\begin{equation}\label{def:ftilde}
\Tilde{\bf f}= {\bf f}_{(\w{\bf p},\Tilde{\mu},\Tilde{\bf h})} = \argmin{{\bf f} \in \w{\cF}} \cR({\bf f}),    
\end{equation}
and then consider the following decomposition
%We focus on the quantity $\cR(\w{\bf f})-\cR({\bf f}^*)$ which can be computed through the decomposition  
\begin{eqnarray*}
\cR(\w{\bf f})-\cR({\bf f}^*)&=&(\cR(\w{\bf f})-\cR(\Tilde{\bf f}))+(\cR(\Tilde{\bf f})-\cR({\bf f}^*))\\
&=:& T_1+T_2.
\end{eqnarray*}
By Equation~\eqref{def:ftilde}, we have that
\begin{align*}
    T_2 & = \cR(\Tilde{{\bf f}})-\cR({\bf f}^*) \\
    %& = %\cR(f_{(\w{p},\Tilde{\mu},\Tilde{\bf h})})-\cR(f_{(p^*,\mu^*,{\bf h}^*)}) \\
    & = \cR({\bf f}_{(\w{\bf p},\Tilde{\mu},\Tilde{\bf h})}) - \cR({\bf f}_{(\w{\bf p},\mu^*,{\bf h}^*)}) \\
    & \hspace{1em} + \cR({\bf f}_{(\w{\bf p},\mu^*,{\bf h}^*)}) - \cR({\bf f}_{({\bf p}^*,\mu^*,{\bf h}^*)}) \\
    & \leq \cR({\bf f}_{(\w{\bf p},\mu^*,{\bf h}^*)}) - \cR({\bf f}_{({\bf p}^*,\mu^*,{\bf h}^*)}).
\end{align*}
Therefore, on  $\mathcal{A}$, we deduce  from the mean value inequality that
\begin{equation}
\label{eq:meanValueWithPk}
T_2 \leq C_K \sum_{k=1}^K |\w{p}_k-p_k^*|^2,
\end{equation}
where $C_K$ is a constant depending on $K$.
For establishing an upper bound for $T_1$, we first recall the definition~\eqref{eq:eqErm} of the empirical risk minimizer over $\w{\cF}$:
\begin{equation*}
\w{{\bf f}} \in \argmin{{\bf f} \in \w{\cF}} \w{\cR}({\bf f}),
\end{equation*}
with
\begin{equation*}
\w{\cR}({\bf f}) = \dfrac{1}{n} \sum_{i=1}^n \sum_{k = 1}^K \left(Z_k^i-{\bf f}^k(\cT^i)\right)^2.  
\end{equation*}
Besides, let us introduce the set of parameters
$$\cS = \{({\bf p},\mu,{\bf h}): \; {\bf p} \in \cP_{p_0/2}, \; \mu \in [\mu_0, \mu_1], \; {\bf h} \in \cH_A^K\}.$$
Then, on $\mathcal{A}$, we have by definition~\eqref{def:ftilde} of $\Tilde{f}$,
\begin{eqnarray}
T_1&=&\cR(\w{\bf f})-\cR(\Tilde{\bf f})\nonumber\\
&=& \cR(\w{\bf f})-\w{\cR}(\w{\bf f})+\w{\cR}(\w{\bf f})-\cR(\Tilde{\bf f})\nonumber\\
&\leq &\cR(\w{\bf f})-\w{\cR}(\w{\bf f})+\w{\cR}(\Tilde{\bf f})-\cR(\Tilde{\bf f})\nonumber\\
& \leq & 2 \sup_{({\bf p},\mu, {\bf h})\in \cS } |\cR({\bf f}_{({\bf p}, \mu, {\bf h})}) - \w{\cR}({\bf f}_{({\bf p}, \mu, {\bf h})})|. \label{majT1}
\end{eqnarray}
By combining~\eqref{eq:meanValueWithPk} and~\eqref{majT1}, we obtain
\begin{multline*}
    \E[ \cR(\w{\bf f})- \cR({\bf f}^*)] \\
    \leq 2\E\left[\sup_{({\bf p},\mu, {\bf h})\in \cS } |\cR({\bf f}_{({\bf p}, \mu, {\bf h})}) - \w{\cR}({\bf f}_{({\bf p}, \mu, {\bf h})})| \one_{\cA}\right]\\
+ \E \left[C_K \sum_{k=1}^K |\w{p}_k -p_k^*|^2 \one_{\cA} \right]\\
+ \E \left[\left( \cR(\w{\bf f})-\cR({\bf f}^*)\right) \one_{\cA^c} \right].
\end{multline*}
Since for $k \in \cY$, $\E[|\w{p}_k-p^*_k|^2]\leq C/n$ with $C$ an absolute constant and $\w{{\bf f}}$ and ${\bf f}^*$ are bounded, by using Equation~\eqref{eq:hoeffding1}, we obtain:
%
\begin{multline}
\label{eq:bound1}
\E[ \cR(\w{\bf f})- \cR({\bf f}^*)]\\ \leq 2 \E\left[ \sup_{({\bf p},\mu, {\bf h})\in \cS } |\cR({\bf f}_{({\bf p}, \mu, {\bf h})}) - \w{\cR}({\bf f}_{({\bf p}, \mu, {\bf h})})|\right] \\
 + C_K \left(\frac{1}{n} + \exp\left(-\frac{np_0^2}{2}\right)\right). 
\end{multline}
It remains to control the first term in the right hand side of the above inequality. 
%Let us introduce the least squares function 
%$$l_{\bf f}(Z, \cT):= \sum_{k = 1}^K (Z_k-f^k(\cT))^2,$$ 
%then we can rewrite
%\begin{multline*}
%\cR({\bf f})-\cR({\bf f}^*) = \mathbb{E}\left[l_{\bf f}(Z, \cT)-l_{{\bf f}^*}(Z, \cT)\right] \\= \sum_{k=1}^K \E\left[\left(f^k(\cT) - f^{*k}(\cT\right))^2\right]
%\end{multline*}
%
%
By Assumption~\ref{ass:assOnHNet} with $\varepsilon= 1/n$ and  since ${\bf p} \in \cP_{p_0/2}$, and $\mu \in [\mu_0, \mu_1]$, there exists a finite set
$\cS_{n}\subset \cS$ such that for each 
$({\bf p},\mu,{\bf h}) \in \cS$, there exists $({\bf p}_{n},\mu_{n},{\bf h}_{n}) \in \cS_n$
satisfying
\begin{equation*}
\|{\bf p}_{n}-{\bf p}\|_1 \leq \frac{C_K}{n}, \;\; |\mu_n-\mu| \leq \frac{1}{n}, \;\; \|{\bf h}_n - {\bf h}\|_{\infty,T} \leq \frac{1}{n}.
\end{equation*}
Moreover, we have $\log({\rm card}(\cS_{n})) \leq C_K \log(n^{d})$.
For $({\bf p},\mu,{\bf h}) \in \cS$,
let us denote
${\bf f}= {\bf f}_{({\bf p},\mu,{\bf h})}$ and ${\bf f}_n= {\bf f}_{({\bf p}_{n},\mu_{n},{\bf h}_{n})}$ the corresponding element of $\cS_n$.
Then, we have
\begin{align*}
|\cR({\bf f})-\w{\cR}({\bf f})| &\leq \left|\cR({\bf f})-{\cR}({\bf f}_n)\right| \\
\\
&+ |\cR({\bf f}_{n})-\w{\cR}({\bf f}_n)|
+ \left|\w{\cR}({\bf f}_{n}) -\w{\cR}({\bf f}) \right|.
\end{align*}
Moreover, since ${\bf f}$ and ${\bf f_n}$ are bounded, we deduce that by denoting $\pi_n:=\boldsymbol{\pi}_{{\bf p}_{n},\mu_{n},{\bf h}_{n}}$
%for $n>1$
\begin{multline*}
\E\left[\left|\cR({\bf f})-{\cR}({\bf f}_n)\right|\right] \leq \E\left[\left\|\pi(\cT) - \pi_n(\cT)\right\|_1\right] \leq  \frac{C}{n},
\end{multline*}
where the last inequality is obtained with the same arguments as in the proof of Proposition~\ref{prop:distPi}.
In the same way, we also get
\begin{equation*}
\E\left[\left|\w{\cR}({\bf f})-\w{\cR}({\bf f}_n)\right|\right] \leq \frac{C}{n}.
\end{equation*}
Finally, from the above inequalities, we obtain that
\begin{align*}
& \E\left[\sup_{ \cS}\left|\cR({\bf f})-\w{\cR}({\bf f})\right|\right]  \\
& \leq 
\frac{2C}{n} + \E\left[\max_{\cS_{n}}\left|\cR({\bf f})-\w{\cR}({\bf f})\right|\right].
\end{align*}
Moreover, by Hoeffding's inequality, it comes for $t \geq 0$,
\begin{align*}
& \P\left(\max_{\cS_n} | \w{\cR}( {\bf f})- \cR({\bf f})| \geq t\right) \\
& \leq \min(1,2\,{\rm card}(\cS_{n}) \exp(-2n t^2)).
\end{align*}
Integrating the previous equation leads to
\begin{multline*}
\E\left[\max_{\cS_n} | \w{\cR}( {{\bf f}})- \cR({\bf f})| \right]
\\
\leq   \int_0^\infty \min(1,\exp(\log(2\,{\rm card}(\cS_{n})) -2nt^2)) \;\dd t \\
 \leq   \int_0^\infty \exp\left(-(2nt^2-\log(2\,{\rm card}(\cS_{n})) )_+ \right)  \;\dd t \\
\leq  \sqrt{\frac{\log(2\,{\rm card}(\mathcal{S}_n))}{2n }}+ \frac{\sqrt{\pi}}{2 \sqrt{2n}}.
\end{multline*}


%using $\int_0^\infty e^{-u^2}du = \sqrt{\pi}/2$.
Finally, since there are at least two elements in $\mathcal{S}_n$, combining the above inequality and Equation~\eqref{eq:bound1} yields
\begin{equation*}
\E[\cR(\w{{\bf f}})- \cR({\bf f}^*)] \leq 
\sqrt{\frac{\log(2\,{\rm card}(\mathcal{S}_n))}{2n }} + \frac{C}{n}, 
\end{equation*}
which concludes the proof.
\end{proof}





\begin{proof}[Proof of Theorem~\ref{thm:riskERM2}]
%Let us consider $\mathcal{H}_\varepsilon$ an $\varepsilon-$net for $\|\|_{\infty,T}$. Then, there %exists $h_\varepsilon \in \mathcal{H}_\varepsilon$ such that
%$\|\w{h}-h_\varepsilon\|_{\infty,T} \leq \varepsilon$.
%Then, 
%$$D_{\w{h}} \leq (D_{\w{h}}-{D}_{{h}_\varepsilon})+ ({D}_{{h}_\varepsilon}-2 %\w{D}_{{h}_\varepsilon})+ 2( \w{D}_{{h}_\varepsilon}-\w{D}_{\w{h}}) = T_1+ T_2+ T_3.$$
%\paragraph{Study of $T_1$}\text{}\\
%With $\varepsilon= 1/n$. \\

Let us denote 
\begin{equation*}
\Delta_n :=  \sum_{k=1}^K (\w{p}_k-p_k^*)^2,
\end{equation*}
where based on $\cD_{n_1}:=\cD_n^1$, $\w{p}_k= \frac{1}{n_1}\sum_{i=1}^{n_1} \one_{Y_i=k}$.  
Note that $\Delta_n$ %is a random variable which only depends on $\cD_{n_1}$ and
is independent from $\cD_{n_2}:=\cD_n^2$. 
Recall that $n$ is assumed to be even and $n_1 = n_2 = n/2$.

Let us work again on the set
 $\mathcal{A} = \left\{{\bf \w{p}} :\; \min({\bf \w{p}}) \geq \frac{p_0}{2}\right\}$.
As in proof of Theorem~\ref{thm:riskERM1}, we can write
$$\cR(\w{\bf f})-\cR({\bf f}^*) \leq \cR(\w{\bf f})-\cR(\Tilde{\bf f})+\cR(\Tilde{\bf f})-\cR({\bf f}^*),$$
and from Equation~\eqref{eq:meanValueWithPk}, the second term in the right hand side of the above inequality is bounded by $C_K \Delta_n$. 

Let us denote 
$$D_{{\bf f}}:=\cR({\bf f})-\cR(\Tilde{\bf f})$$
and 
$$\w{D}_{{\bf f}}:=\w{\cR}({\bf f})-\w{\cR}(\Tilde{\bf f}).$$
Furthermore, let us introduce 
\begin{equation*}
\tilde{\cS} = \{(\mu, {\bf h}): \; \mu \in [\mu_0, \mu_1], \; {\bf h} \in \cH^K_A\}.
\end{equation*}
By Assumption~\ref{ass:assOnHNet}, there exists a subset $\tilde{\cS_n} \subset \tilde{\cS}$ with $\log({\rm card}(\tilde{\cS}_{n})) \leq C \log(n^{d})$ , such that for each 
$(\mu,{\bf h}) \in \tilde{\cS}$, there exists $(\mu_{n},{\bf h}_{n}) \in \tilde{\cS}_n$
satisfying
\begin{equation*}
|\mu_n-\mu| \leq \frac{1}{n} \quad {\rm and} \quad \|{\bf h}_n - {\bf h}\|_{\infty,T} \leq \frac{1}{n}.
\end{equation*}
For $(\mu,{\bf h}) \in \tilde{\cS}$,
let us denote
${\bf f}= {\bf f}_{(\w{{\bf p}},\mu,{\bf h})}$ and ${\bf f}_n= {\bf f}_{(\w{{\bf p}},\mu_{n},{\bf h}_{n})}$ the associated element of $\tilde{\cS_n}$.
Then, the following decomposition holds
\begin{eqnarray*}
D_{\w{\bf f}} &\leq &  D_{\w{\bf f}}- 2 \w{D}_{\w{\bf f}}\\
&=& (D_{\w{\bf f}}-D_{{\bf f}_n})+ (2\w{D}_{{\bf f}_n}-2\w{D}_{\w{\bf f}})\\
&&+ (D_{{\bf f}_n}-2\w{D}_{{\bf f}_n})\\
&=:& T_1+ T_2 +T_3. 
\end{eqnarray*}
As in proof of Theorem~\ref{thm:riskERM1} and using same arguments as in proof of Proposition~\ref{prop:distPi}, we have
\begin{equation*}
\E\left[T_i\right] \leq \frac{C}{n}, \quad \mbox{for } \, i=1,2.
\end{equation*}
Besides, 
$$T_3 \leq \max_{ \tilde{\cS_n} } (D_{{\bf f}}-2\w{D}_{{\bf f}}).$$
Therefore, gathering the previous inequalities,
we deduce that 
\begin{multline}
\label{eq:bound2}
\E[ \cR(\w{\bf f})- \cR({\bf f}^*)]\\ \leq  \E\left[\max_{ \tilde{\cS_n} } (D_{{\bf f}}-2\w{D}_{{\bf f}})\one_{\cA}\right] \\
 + C_K \left(\frac{1}{n} + \exp\left(-\frac{np_0^2}{4}\right)\right). 
\end{multline}
Therefore to finish the proof it remains to control the first term in the right hand side of Inequality~\eqref{eq:bound2}.
For $u \geq 0$, on $\cA$ and conditionally on $\cD_{n_1}$, it holds that, 
\begin{multline}
\label{eq:eqboundExpInt}
\E \left[\max_{\tilde{\cS}_n} (D_{{\bf f}}-2\w{D}_{{\bf f}})\right]\\
 \leq  u+ \int_u^{\infty} \P\left( \max_{\tilde{\cS}_n} (D_{{\bf f}}-2\w{D}_{{\bf f}}) \geq t \right)dt.
\end{multline}
Let us introduce the least squares function 
$$l_{\bf f}(Z, \cT):= \sum_{k = 1}^K (Z_k-{\bf{f}}^k(\cT))^2.$$ 
Since for each $(\mu, {\bf h}) \in \tilde{\cS}$, ${\bf f}_{(\w{{\bf p}},\mu,{\bf h})}$ are uniformly bounded by $1$, we get from Bernstein's inequality, conditionally on $\cD_{n_1}$, for $t \geq 0$
\begin{multline}
\label{eq:eqBernstein}
\P\left( D_{{\bf f}}-2\w{D}_{{\bf f}} \geq t \right) \leq  \P\left( 2(D_{{\bf f}}-2\w{D}_{{\bf f}}) \geq t + D_{{\bf f}} \right) \\
\leq \exp \left( \frac{-n(t+D_{\bf f})^2/8}{ B_{{\bf f}} + (t+D_{\bf f})4K/3} \right),
\end{multline}
with 
\begin{equation*}
B_{{\bf f}} :=  \E\left[\left(l_{\bf \bf{f}}(Z, \cT)-l_{\tilde{{\bf f}}}(Z, \cT)\right)^2\right].
\end{equation*}
Besides, conditionally on $\cD_{n_1}$, 
we have
\begin{equation*}
l_{\bf f}(Z, \cT)-l_{{\bf f}^*}(Z, \cT) \leq C\sum_{k = 1}^K \left({\bf f}^k(\cT)-{\bf f}^{*k}(\cT)\right).
\end{equation*}
Therefore, conditionally on $\cD_{n_1}$, we deduce from Cauchy-Schwartz Inequality
\begin{multline*}
\E\left[\left(l_{\bf f}(Z, \cT)-l_{{\bf f}^*}(Z, \cT)\right)^2\right]\\ \leq C_K 
\sum_{k=1}^K \mathbb{E}\left[({\bf f}^k(\cT)-{\bf f}^{*k}(\cT))^2\right] \\
= C_K \left(\cR({\bf f})-\cR({\bf f}^*) \right).
\end{multline*}
Thus,  writing 
\begin{multline*}
B_{{\bf f}} \leq  2 \E\left[ \left(l_{\bf f}(Z, \cT)-l_{{\bf f}^*}(Z, \cT)\right)^2 \right] \\+ 2\E\left[ \left(l_{\Tilde{\bf f}}(Z, \cT)-l_{{\bf f}^*}(Z, \cT)\right)^2\right],
\end{multline*}
we deduce
$$B_{{\bf f}}  \leq C_K\left(\cR({\bf f})-\cR({\bf f}^*)+\cR(\Tilde{\bf f})-\cR({\bf f}^*) \right).$$
Then,  as $\cR({\bf f})-\cR({\bf f}^*) = \cR({\bf f})-\cR(\Tilde{\bf f})+
\cR(\Tilde{\bf f})-\cR({\bf f}^*)$,  conditionally on $\cD_{n_1}$ and on the event $\cA$, we deduce from the above inequality and Equation~\eqref{eq:meanValueWithPk} that
%\begin{multline*}
$$
B_{{\bf f}} 
%\leq  
%C_K\left(D_{{\bf f}}  + \cR(\Tilde{\bf f})-\cR({\bf f}^*) \right)
 \leq C_K\left(D_{{\bf f}}  + \Delta_n\right).
%\end{multline*}
$$
%Thus,  
%$$B_{{\bf f}}  \leq C_K\left(\cR({\bf f})-\cR(\Tilde{\bf f})+\cR(\Tilde{\bf f})-\cR({\bf f}^*) \right).$$
%Therefore, conditionally on $\cD_{n_1}$ and on the event $\cA$, we deduce from the above inequality and Equation~\eqref{eq:meanValueWithPk} that
%%\begin{multline*}
%$$
%B_{{\bf f}} 
%%\leq  
%%C_K\left(D_{{\bf f}}  + \cR(\Tilde{\bf f})-\cR({\bf f}^*) \right)
% \leq C_K\left(D_{{\bf f}}  + \Delta_n\right).
%%\end{multline*}
%$$
Hence, 
%conditional on $\cD_{n_1}$, from 
from Inequality~\eqref{eq:eqBernstein}, we get for $t \geq \Delta_n$,
\begin{equation*}
\P\left( D_{{\bf f}}-2\w{D}_{{\bf f}} \geq t \right) \leq  \exp\left( -C_K n t\right),
\end{equation*}
which leads to  
\begin{equation*}
\P\left( \max_{\tilde{\cS_n}} (D_{{\bf f}}-2\w{D}_{{\bf f}}) \geq t  \right) \leq \text{card}(\tilde{\cS_n}) \exp\left(-C_K n t\right).
\end{equation*}
In view of Equation~\eqref{eq:eqboundExpInt}, we then obtain that, conditionally
on $\cD_{n_1}$,
\begin{multline*}
\E \left[\max_{\tilde{\cS}_n} (D_{{\bf f}}-2\w{D}_{{\bf f}}) \one_{\cA}\right]
\leq \max\left(\Delta_n, \frac{C_K \log(\tilde{\cS_n})}{n}\right)\\
+\int_{C_K \log(\tilde{\cS_n})/n}^{+\infty} \exp(-C_Knt)dt.
\end{multline*}
Finally, integrating the above inequality ,{\it w.r.t.} $\cD_{n_1}$, yields
\begin{equation*}
\E \left[\max_{\tilde{\cS}_n} (D_{{\bf f}}-2\w{D}_{{\bf f}}) \one_{\cA}\right]    \leq \frac{C_K \log(\tilde{\cS_n})}{n}.
\end{equation*}
Hence, this inequality combined with Equation~\eqref{eq:bound2} give the desired result.



\end{proof}




%%%%%%%%%%%%%%%%%%%%
\bibliography{BIB_UAI}
%%%%%%%%%%%%%%%%%%%%
%

\end{document}
