%\documentclass{uai2023} % for initial submission
\documentclass[accepted]{uai2023} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like

%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2023} % ptmx math instead of Computer
% Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2023} % newtx fonts (improves upon
 % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams
\usepackage{algorithm}
\usepackage{algorithmic}
\usepackage{subcaption}
\usepackage{multirow}

% for cross referencing the main text
% PLEASE ONLY USE xr IN THE SUPPLEMENTARY MATERIAL. 
% In the main paper, hard code any cross-reference to the supplementary material. 
\usepackage{xr} 
\externaldocument{uai2023-template}

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example
\newtheorem{theorem}{Theorem}[section]
\newtheorem{lemma}{Lemma}[section]
\newtheorem{corollary}{Corollary}[section]
\usepackage{tikz}
\newcommand*\circled[1]{\tikz[baseline=(char.base)]{
		\node[shape=circle,draw,inner sep=1pt] (char) {#1};}}
%%%%% NEW MATH DEFINITIONS %%%%%

\usepackage{amsmath,amsfonts,bm}

% Mark sections of captions for referring to divisions of figures
\newcommand{\figleft}{{\em (Left)}}
\newcommand{\figcenter}{{\em (Center)}}
\newcommand{\figright}{{\em (Right)}}
\newcommand{\figtop}{{\em (Top)}}
\newcommand{\figbottom}{{\em (Bottom)}}
\newcommand{\captiona}{{\em (a)}}
\newcommand{\captionb}{{\em (b)}}
\newcommand{\captionc}{{\em (c)}}
\newcommand{\captiond}{{\em (d)}}

% Highlight a newly defined term
\newcommand{\newterm}[1]{{\bf #1}}


% Figure reference, lower-case.
\def\figref#1{figure~\ref{#1}}
% Figure reference, capital. For start of sentence
\def\Figref#1{Figure~\ref{#1}}
\def\twofigref#1#2{figures \ref{#1} and \ref{#2}}
\def\quadfigref#1#2#3#4{figures \ref{#1}, \ref{#2}, \ref{#3} and \ref{#4}}
% Section reference, lower-case.
\def\secref#1{section~\ref{#1}}
% Section reference, capital.
\def\Secref#1{Section~\ref{#1}}
% Reference to two sections.
\def\twosecrefs#1#2{sections \ref{#1} and \ref{#2}}
% Reference to three sections.
\def\secrefs#1#2#3{sections \ref{#1}, \ref{#2} and \ref{#3}}
% Reference to an equation, lower-case.
\def\eqref#1{equation~\ref{#1}}
% Reference to an equation, upper case
\def\Eqref#1{Equation~\ref{#1}}
% A raw reference to an equation---avoid using if possible
\def\plaineqref#1{\ref{#1}}
% Reference to a chapter, lower-case.
\def\chapref#1{chapter~\ref{#1}}
% Reference to an equation, upper case.
\def\Chapref#1{Chapter~\ref{#1}}
% Reference to a range of chapters
\def\rangechapref#1#2{chapters\ref{#1}--\ref{#2}}
% Reference to an algorithm, lower-case.
\def\algref#1{algorithm~\ref{#1}}
% Reference to an algorithm, upper case.
\def\Algref#1{Algorithm~\ref{#1}}
\def\twoalgref#1#2{algorithms \ref{#1} and \ref{#2}}
\def\Twoalgref#1#2{Algorithms \ref{#1} and \ref{#2}}
% Reference to a part, lower case
\def\partref#1{part~\ref{#1}}
% Reference to a part, upper case
\def\Partref#1{Part~\ref{#1}}
\def\twopartref#1#2{parts \ref{#1} and \ref{#2}}

\def\ceil#1{\lceil #1 \rceil}
\def\floor#1{\lfloor #1 \rfloor}
\def\1{\bm{1}}
\newcommand{\train}{\mathcal{D}}
\newcommand{\valid}{\mathcal{D_{\mathrm{valid}}}}
\newcommand{\test}{\mathcal{D_{\mathrm{test}}}}

\def\eps{{\epsilon}}


% Random variables
\def\reta{{\textnormal{$\eta$}}}
\def\ra{{\textnormal{a}}}
\def\rb{{\textnormal{b}}}
\def\rc{{\textnormal{c}}}
\def\rd{{\textnormal{d}}}
\def\re{{\textnormal{e}}}
\def\rf{{\textnormal{f}}}
\def\rg{{\textnormal{g}}}
\def\rh{{\textnormal{h}}}
\def\ri{{\textnormal{i}}}
\def\rj{{\textnormal{j}}}
\def\rk{{\textnormal{k}}}
\def\rl{{\textnormal{l}}}
% rm is already a command, just don't name any random variables m
\def\rn{{\textnormal{n}}}
\def\ro{{\textnormal{o}}}
\def\rp{{\textnormal{p}}}
\def\rq{{\textnormal{q}}}
\def\rr{{\textnormal{r}}}
\def\rs{{\textnormal{s}}}
\def\rt{{\textnormal{t}}}
\def\ru{{\textnormal{u}}}
\def\rv{{\textnormal{v}}}
\def\rw{{\textnormal{w}}}
\def\rx{{\textnormal{x}}}
\def\ry{{\textnormal{y}}}
\def\rz{{\textnormal{z}}}

% Random vectors
\def\rvepsilon{{\mathbf{\epsilon}}}
\def\rvtheta{{\mathbf{\theta}}}
\def\rva{{\mathbf{a}}}
\def\rvb{{\mathbf{b}}}
\def\rvc{{\mathbf{c}}}
\def\rvd{{\mathbf{d}}}
\def\rve{{\mathbf{e}}}
\def\rvf{{\mathbf{f}}}
\def\rvg{{\mathbf{g}}}
\def\rvh{{\mathbf{h}}}
\def\rvu{{\mathbf{i}}}
\def\rvj{{\mathbf{j}}}
\def\rvk{{\mathbf{k}}}
\def\rvl{{\mathbf{l}}}
\def\rvm{{\mathbf{m}}}
\def\rvn{{\mathbf{n}}}
\def\rvo{{\mathbf{o}}}
\def\rvp{{\mathbf{p}}}
\def\rvq{{\mathbf{q}}}
\def\rvr{{\mathbf{r}}}
\def\rvs{{\mathbf{s}}}
\def\rvt{{\mathbf{t}}}
\def\rvu{{\mathbf{u}}}
\def\rvv{{\mathbf{v}}}
\def\rvw{{\mathbf{w}}}
\def\rvx{{\mathbf{x}}}
\def\rvy{{\mathbf{y}}}
\def\rvz{{\mathbf{z}}}

% Elements of random vectors
\def\erva{{\textnormal{a}}}
\def\ervb{{\textnormal{b}}}
\def\ervc{{\textnormal{c}}}
\def\ervd{{\textnormal{d}}}
\def\erve{{\textnormal{e}}}
\def\ervf{{\textnormal{f}}}
\def\ervg{{\textnormal{g}}}
\def\ervh{{\textnormal{h}}}
\def\ervi{{\textnormal{i}}}
\def\ervj{{\textnormal{j}}}
\def\ervk{{\textnormal{k}}}
\def\ervl{{\textnormal{l}}}
\def\ervm{{\textnormal{m}}}
\def\ervn{{\textnormal{n}}}
\def\ervo{{\textnormal{o}}}
\def\ervp{{\textnormal{p}}}
\def\ervq{{\textnormal{q}}}
\def\ervr{{\textnormal{r}}}
\def\ervs{{\textnormal{s}}}
\def\ervt{{\textnormal{t}}}
\def\ervu{{\textnormal{u}}}
\def\ervv{{\textnormal{v}}}
\def\ervw{{\textnormal{w}}}
\def\ervx{{\textnormal{x}}}
\def\ervy{{\textnormal{y}}}
\def\ervz{{\textnormal{z}}}

% Random matrices
\def\rmA{{\mathbf{A}}}
\def\rmB{{\mathbf{B}}}
\def\rmC{{\mathbf{C}}}
\def\rmD{{\mathbf{D}}}
\def\rmE{{\mathbf{E}}}
\def\rmF{{\mathbf{F}}}
\def\rmG{{\mathbf{G}}}
\def\rmH{{\mathbf{H}}}
\def\rmI{{\mathbf{I}}}
\def\rmJ{{\mathbf{J}}}
\def\rmK{{\mathbf{K}}}
\def\rmL{{\mathbf{L}}}
\def\rmM{{\mathbf{M}}}
\def\rmN{{\mathbf{N}}}
\def\rmO{{\mathbf{O}}}
\def\rmP{{\mathbf{P}}}
\def\rmQ{{\mathbf{Q}}}
\def\rmR{{\mathbf{R}}}
\def\rmS{{\mathbf{S}}}
\def\rmT{{\mathbf{T}}}
\def\rmU{{\mathbf{U}}}
\def\rmV{{\mathbf{V}}}
\def\rmW{{\mathbf{W}}}
\def\rmX{{\mathbf{X}}}
\def\rmY{{\mathbf{Y}}}
\def\rmZ{{\mathbf{Z}}}

% Elements of random matrices
\def\ermA{{\textnormal{A}}}
\def\ermB{{\textnormal{B}}}
\def\ermC{{\textnormal{C}}}
\def\ermD{{\textnormal{D}}}
\def\ermE{{\textnormal{E}}}
\def\ermF{{\textnormal{F}}}
\def\ermG{{\textnormal{G}}}
\def\ermH{{\textnormal{H}}}
\def\ermI{{\textnormal{I}}}
\def\ermJ{{\textnormal{J}}}
\def\ermK{{\textnormal{K}}}
\def\ermL{{\textnormal{L}}}
\def\ermM{{\textnormal{M}}}
\def\ermN{{\textnormal{N}}}
\def\ermO{{\textnormal{O}}}
\def\ermP{{\textnormal{P}}}
\def\ermQ{{\textnormal{Q}}}
\def\ermR{{\textnormal{R}}}
\def\ermS{{\textnormal{S}}}
\def\ermT{{\textnormal{T}}}
\def\ermU{{\textnormal{U}}}
\def\ermV{{\textnormal{V}}}
\def\ermW{{\textnormal{W}}}
\def\ermX{{\textnormal{X}}}
\def\ermY{{\textnormal{Y}}}
\def\ermZ{{\textnormal{Z}}}

% Vectors
\def\vzero{{\bm{0}}}
\def\vone{{\bm{1}}}
\def\vmu{{\bm{\mu}}}
\def\vtheta{{\bm{\theta}}}
\def\va{{\bm{a}}}
\def\vb{{\bm{b}}}
\def\vc{{\bm{c}}}
\def\vd{{\bm{d}}}
\def\ve{{\bm{e}}}
\def\vf{{\bm{f}}}
\def\vg{{\bm{g}}}
\def\vh{{\bm{h}}}
\def\vi{{\bm{i}}}
\def\vj{{\bm{j}}}
\def\vk{{\bm{k}}}
\def\vl{{\bm{l}}}
\def\vm{{\bm{m}}}
\def\vn{{\bm{n}}}
\def\vo{{\bm{o}}}
\def\vp{{\bm{p}}}
\def\vq{{\bm{q}}}
\def\vr{{\bm{r}}}
\def\vs{{\bm{s}}}
\def\vt{{\bm{t}}}
\def\vu{{\bm{u}}}
\def\vv{{\bm{v}}}
\def\vw{{\bm{w}}}
\def\vx{{\bm{x}}}
\def\vy{{\bm{y}}}
\def\vz{{\bm{z}}}

% Elements of vectors
\def\evalpha{{\alpha}}
\def\evbeta{{\beta}}
\def\evepsilon{{\epsilon}}
\def\evlambda{{\lambda}}
\def\evomega{{\omega}}
\def\evmu{{\mu}}
\def\evpsi{{\psi}}
\def\evsigma{{\sigma}}
\def\evtheta{{\theta}}
\def\eva{{a}}
\def\evb{{b}}
\def\evc{{c}}
\def\evd{{d}}
\def\eve{{e}}
\def\evf{{f}}
\def\evg{{g}}
\def\evh{{h}}
\def\evi{{i}}
\def\evj{{j}}
\def\evk{{k}}
\def\evl{{l}}
\def\evm{{m}}
\def\evn{{n}}
\def\evo{{o}}
\def\evp{{p}}
\def\evq{{q}}
\def\evr{{r}}
\def\evs{{s}}
\def\evt{{t}}
\def\evu{{u}}
\def\evv{{v}}
\def\evw{{w}}
\def\evx{{x}}
\def\evy{{y}}
\def\evz{{z}}

% Matrix
\def\mA{{\bm{A}}}
\def\mB{{\bm{B}}}
\def\mC{{\bm{C}}}
\def\mD{{\bm{D}}}
\def\mE{{\bm{E}}}
\def\mF{{\bm{F}}}
\def\mG{{\bm{G}}}
\def\mH{{\bm{H}}}
\def\mI{{\bm{I}}}
\def\mJ{{\bm{J}}}
\def\mK{{\bm{K}}}
\def\mL{{\bm{L}}}
\def\mM{{\bm{M}}}
\def\mN{{\bm{N}}}
\def\mO{{\bm{O}}}
\def\mP{{\bm{P}}}
\def\mQ{{\bm{Q}}}
\def\mR{{\bm{R}}}
\def\mS{{\bm{S}}}
\def\mT{{\bm{T}}}
\def\mU{{\bm{U}}}
\def\mV{{\bm{V}}}
\def\mW{{\bm{W}}}
\def\mX{{\bm{X}}}
\def\mY{{\bm{Y}}}
\def\mZ{{\bm{Z}}}
\def\mBeta{{\bm{\beta}}}
\def\mPhi{{\bm{\Phi}}}
\def\mLambda{{\bm{\Lambda}}}
\def\mSigma{{\bm{\Sigma}}}

\def\la{\langle}
\def\ra{\rangle}

% Tensor
\DeclareMathAlphabet{\mathsfit}{\encodingdefault}{\sfdefault}{m}{sl}
\SetMathAlphabet{\mathsfit}{bold}{\encodingdefault}{\sfdefault}{bx}{n}
\newcommand{\tens}[1]{\bm{\mathsfit{#1}}}
\def\tA{{\tens{A}}}
\def\tB{{\tens{B}}}
\def\tC{{\tens{C}}}
\def\tD{{\tens{D}}}
\def\tE{{\tens{E}}}
\def\tF{{\tens{F}}}
\def\tG{{\tens{G}}}
\def\tH{{\tens{H}}}
\def\tI{{\tens{I}}}
\def\tJ{{\tens{J}}}
\def\tK{{\tens{K}}}
\def\tL{{\tens{L}}}
\def\tM{{\tens{M}}}
\def\tN{{\tens{N}}}
\def\tO{{\tens{O}}}
\def\tP{{\tens{P}}}
\def\tQ{{\tens{Q}}}
\def\tR{{\tens{R}}}
\def\tS{{\tens{S}}}
\def\tT{{\tens{T}}}
\def\tU{{\tens{U}}}
\def\tV{{\tens{V}}}
\def\tW{{\tens{W}}}
\def\tX{{\tens{X}}}
\def\tY{{\tens{Y}}}
\def\tZ{{\tens{Z}}}


% Graph
\def\gA{{\mathcal{A}}}
\def\gB{{\mathcal{B}}}
\def\gC{{\mathcal{C}}}
\def\gD{{\mathcal{D}}}
\def\gE{{\mathcal{E}}}
\def\gF{{\mathcal{F}}}
\def\gG{{\mathcal{G}}}
\def\gH{{\mathcal{H}}}
\def\gI{{\mathcal{I}}}
\def\gJ{{\mathcal{J}}}
\def\gK{{\mathcal{K}}}
\def\gL{{\mathcal{L}}}
\def\gM{{\mathcal{M}}}
\def\gN{{\mathcal{N}}}
\def\gO{{\mathcal{O}}}
\def\gP{{\mathcal{P}}}
\def\gQ{{\mathcal{Q}}}
\def\gR{{\mathcal{R}}}
\def\gS{{\mathcal{S}}}
\def\gT{{\mathcal{T}}}
\def\gU{{\mathcal{U}}}
\def\gV{{\mathcal{V}}}
\def\gW{{\mathcal{W}}}
\def\gX{{\mathcal{X}}}
\def\gY{{\mathcal{Y}}}
\def\gZ{{\mathcal{Z}}}

% Sets
\def\sA{{\mathbb{A}}}
\def\sB{{\mathbb{B}}}
\def\sC{{\mathbb{C}}}
\def\sD{{\mathbb{D}}}
% Don't use a set called E, because this would be the same as our symbol
% for expectation.
\def\sF{{\mathbb{F}}}
\def\sG{{\mathbb{G}}}
\def\sH{{\mathbb{H}}}
\def\sI{{\mathbb{I}}}
\def\sJ{{\mathbb{J}}}
\def\sK{{\mathbb{K}}}
\def\sL{{\mathbb{L}}}
\def\sM{{\mathbb{M}}}
\def\sN{{\mathbb{N}}}
\def\sO{{\mathbb{O}}}
\def\sP{{\mathbb{P}}}
\def\sQ{{\mathbb{Q}}}
\def\sR{{\mathbb{R}}}
\def\sS{{\mathbb{S}}}
\def\sT{{\mathbb{T}}}
\def\sU{{\mathbb{U}}}
\def\sV{{\mathbb{V}}}
\def\sW{{\mathbb{W}}}
\def\sX{{\mathbb{X}}}
\def\sY{{\mathbb{Y}}}
\def\sZ{{\mathbb{Z}}}

% Entries of a matrix
\def\emLambda{{\Lambda}}
\def\emA{{A}}
\def\emB{{B}}
\def\emC{{C}}
\def\emD{{D}}
\def\emE{{E}}
\def\emF{{F}}
\def\emG{{G}}
\def\emH{{H}}
\def\emI{{I}}
\def\emJ{{J}}
\def\emK{{K}}
\def\emL{{L}}
\def\emM{{M}}
\def\emN{{N}}
\def\emO{{O}}
\def\emP{{P}}
\def\emQ{{Q}}
\def\emR{{R}}
\def\emS{{S}}
\def\emT{{T}}
\def\emU{{U}}
\def\emV{{V}}
\def\emW{{W}}
\def\emX{{X}}
\def\emY{{Y}}
\def\emZ{{Z}}
\def\emSigma{{\Sigma}}

% entries of a tensor
% Same font as tensor, without \bm wrapper
\newcommand{\etens}[1]{\mathsfit{#1}}
\def\etLambda{{\etens{\Lambda}}}
\def\etA{{\etens{A}}}
\def\etB{{\etens{B}}}
\def\etC{{\etens{C}}}
\def\etD{{\etens{D}}}
\def\etE{{\etens{E}}}
\def\etF{{\etens{F}}}
\def\etG{{\etens{G}}}
\def\etH{{\etens{H}}}
\def\etI{{\etens{I}}}
\def\etJ{{\etens{J}}}
\def\etK{{\etens{K}}}
\def\etL{{\etens{L}}}
\def\etM{{\etens{M}}}
\def\etN{{\etens{N}}}
\def\etO{{\etens{O}}}
\def\etP{{\etens{P}}}
\def\etQ{{\etens{Q}}}
\def\etR{{\etens{R}}}
\def\etS{{\etens{S}}}
\def\etT{{\etens{T}}}
\def\etU{{\etens{U}}}
\def\etV{{\etens{V}}}
\def\etW{{\etens{W}}}
\def\etX{{\etens{X}}}
\def\etY{{\etens{Y}}}
\def\etZ{{\etens{Z}}}

% The true underlying data generating distribution
\newcommand{\pdata}{p_{\rm{data}}}
% The empirical distribution defined by the training set
\newcommand{\ptrain}{\hat{p}_{\rm{data}}}
\newcommand{\Ptrain}{\hat{P}_{\rm{data}}}
% The model distribution
\newcommand{\pmodel}{p_{\rm{model}}}
\newcommand{\Pmodel}{P_{\rm{model}}}
\newcommand{\ptildemodel}{\tilde{p}_{\rm{model}}}
% Stochastic autoencoder distributions
\newcommand{\pencode}{p_{\rm{encoder}}}
\newcommand{\pdecode}{p_{\rm{decoder}}}
\newcommand{\precons}{p_{\rm{reconstruct}}}

\newcommand{\laplace}{\mathrm{Laplace}} % Laplace distribution

\newcommand{\E}{\mathbb{E}}
\newcommand{\Ls}{\mathcal{L}}
\newcommand{\R}{\mathbb{R}}
\newcommand{\emp}{\tilde{p}}
\newcommand{\lr}{\alpha}
\newcommand{\reg}{\lambda}
\newcommand{\rect}{\mathrm{rectifier}}
\newcommand{\softmax}{\mathrm{softmax}}
\newcommand{\sigmoid}{\sigma}
\newcommand{\softplus}{\zeta}
\newcommand{\KL}{D_{\mathrm{KL}}}
\newcommand{\Var}{\mathrm{Var}}
\newcommand{\standarderror}{\mathrm{SE}}
\newcommand{\Cov}{\mathrm{Cov}}
% Wolfram Mathworld says $L^2$ is for function spaces and $\ell^2$ is for vectors
% But then they seem to use $L^2$ for vectors throughout the site, and so does
% wikipedia.
\newcommand{\normlzero}{L^0}
\newcommand{\normlone}{L^1}
\newcommand{\normltwo}{L^2}
\newcommand{\normlp}{L^p}
\newcommand{\normmax}{L^\infty}

\newcommand{\parents}{Pa} % See usage in notation.tex. Chosen to match Daphne's book.

\DeclareMathOperator*{\argmax}{arg\,max}
\DeclareMathOperator*{\argmin}{arg\,min}

\DeclareMathOperator{\sign}{sign}
\DeclareMathOperator{\Tr}{Tr}
\let\ab\allowbreak

\graphicspath{{figures/}}
\newcommand{\eat}[1]{}

\newcommand{\Cy}{\Ccal_y}
\renewcommand{\l}{\boldsymbol{\ell}}
\renewcommand{\S}
{\!\!\,\includegraphics[height=1em]{figs/arcsinh}\!\!}
\def\Gt{\widetilde{G}}
\def\ah{\widehat{a}}
\def\ahb{\mathbf{\ah}}
\def\at{\widetilde{a}}
\def\atb{\mathbf{\at}}
\def\yh{\widehat{y}}
\def\yt{\widetilde{y}}
\def\ytb{\mathbf{\yt}}
\def\yhb{\mathbf{\yh}}
\def\zh{\widehat{z}}
\def\zt{\widetilde{z}}
\def\ztb{\mathbf{\zt}}
\def\zhb{\mathbf{\zh}}
\def\g{\mathbf{g}}
\def\bah{\mathbf{\widehat{a}}}
\RequirePackage{latexsym}
\RequirePackage{amsmath}
%\RequirePackage{amsthm}
\RequirePackage{amssymb}
\RequirePackage{bm}

\newcommand{\Magenta}[1]{\color{magenta}{#1}\color{black}}
\newcommand{\Blue}[1]{\textcolor{blue}{#1}}
%\newcommand{\Green}[1]{\color{OliveGreen}{#1}}
\newcommand{\Green}[1]{\textcolor{mygreen}{#1}}
\newcommand{\Red}[1]{\textcolor{red}{#1}}
\newcommand{\Cyan}[1]{\textcolor{cyan}{#1}}
\newcommand{\Orange}[1]{\textcolor{orange}{#1}}
\newcommand{\Brown}[1]{\textcolor{brown}{#1}}
%\newcommand{\Ref}[1]{\hfill\Green{[#1]}}
\DeclareMathOperator{\arginf}{arg\,inf}
%\newcommand{\alert}[1]{\Red{\bf {#1}}}

\newcommand{\rmd}{\mathrm{d}} % use italics only for variable

%\newcommand{\Tr}{^\mathsf{T}} % sf looks nicer than rm


%%% MW %%%%

\newcommand{\mnote}[1]{{\bf\large *}\marginpar{\Magenta{#1}}}
\newcommand{\dnote}[1]{{\bf\large +}\marginpar{\tiny#1}}
\ifx\note\undefined
\newcommand{\note}[1]{{\bf \Magenta{#1}}}
\else
\renewcommand{\note}[1]{{\bf \Magenta{#1}}}
\fi

\def\x{\mathbf{x}}
\def\a{\mathbf{a}}
\def\y{\mathbf{y}}
\def\z{\mathbf{z}}
\def\w{\mathbf{w}}
\def\t{\boldsymbol{\theta}}
\def\R{\mathbb{R}}

%%%%%%%% Stock standard definitions %%%%%%%%%%%%%%%

%\DeclareMathOperator{\ab}{\mathbf{a}}
\DeclareMathOperator{\bb}{\mathbf{b}}
\DeclareMathOperator{\cbb}{\mathbf{c}}
\DeclareMathOperator{\db}{\mathbf{d}}
\DeclareMathOperator{\eb}{\mathbf{e}}
\DeclareMathOperator{\fb}{\mathbf{f}}
\DeclareMathOperator{\gb}{\mathbf{g}}
\DeclareMathOperator{\hb}{\mathbf{h}}
\DeclareMathOperator{\ib}{\mathbf{i}}
\DeclareMathOperator{\jb}{\mathbf{j}}
\DeclareMathOperator{\kb}{\mathbf{k}}
\DeclareMathOperator{\lb}{\mathbf{l}}
\DeclareMathOperator{\mb}{\mathbf{m}}
\DeclareMathOperator{\nbb}{\mathbf{n}}
\DeclareMathOperator{\ob}{\mathbf{o}}
\DeclareMathOperator{\pb}{\mathbf{p}}
\DeclareMathOperator{\qb}{\mathbf{q}}
\DeclareMathOperator{\sbb}{\mathbf{s}}
\DeclareMathOperator{\tb}{\mathbf{t}}
\DeclareMathOperator{\ub}{\mathbf{u}}
\DeclareMathOperator{\wb}{\mathbf{w}}
\DeclareMathOperator{\xb}{\mathbf{x}}
\DeclareMathOperator{\yb}{\mathbf{y}}
\DeclareMathOperator{\zb}{\mathbf{z}}

\DeclareMathOperator{\pr}{\mathrm{p}}

\DeclareMathOperator{\deltab}{\boldsymbol{\delta}}

\DeclareMathOperator{\atilde}{\tilde{\ab}}
\DeclareMathOperator{\btilde}{\tilde{\bb}}
\DeclareMathOperator{\ctilde}{\tilde{\cb}}
\DeclareMathOperator{\dtilde}{\tilde{\db}}
\DeclareMathOperator{\etilde}{\tilde{\eb}}
\DeclareMathOperator{\ftilde}{\tilde{\fb}}
\DeclareMathOperator{\gtilde}{\tilde{\gb}}
\DeclareMathOperator{\htilde}{\tilde{\hb}}
\DeclareMathOperator{\itilde}{\tilde{\ib}}
\DeclareMathOperator{\jtilde}{\tilde{\jb}}
\DeclareMathOperator{\ktilde}{\tilde{\kb}}
\DeclareMathOperator{\ltilde}{\tilde{\lb}}
\DeclareMathOperator{\mtilde}{\tilde{\mb}}
\DeclareMathOperator{\ntilde}{\tilde{\nbb}}
\DeclareMathOperator{\otilde}{\tilde{\ob}}
%
\DeclareMathOperator{\qtilde}{\tilde{\qb}}
\DeclareMathOperator{\rtilde}{\tilde{\rb}}
\DeclareMathOperator{\stilde}{\tilde{\sbb}}
\DeclareMathOperator{\ttilde}{\tilde{\tb}}
\DeclareMathOperator{\utilde}{\tilde{\ub}}
\DeclareMathOperator{\vtilde}{\tilde{\vb}}
\DeclareMathOperator{\wtilde}{\tilde{\wb}}
\DeclareMathOperator{\xtilde}{\tilde{\xb}}
\DeclareMathOperator{\ytilde}{\tilde{\yb}}
\DeclareMathOperator{\ztilde}{\tilde{\zb}}

\DeclareMathOperator{\abar}{\bar{\ab}}
\DeclareMathOperator{\bbar}{\bar{\bb}}
\DeclareMathOperator{\cbar}{\bar{\cb}}
\DeclareMathOperator{\dbar}{\bar{\db}}
\DeclareMathOperator{\ebar}{\bar{\eb}}
\DeclareMathOperator{\fbar}{\bar{\fb}}
\DeclareMathOperator{\gbar}{\bar{\gb}}
\DeclareMathOperator{\hbbar}{\bar{\hb}}
\DeclareMathOperator{\ibar}{\bar{\ib}}
\DeclareMathOperator{\jbar}{\bar{\jb}}
\DeclareMathOperator{\kbar}{\bar{\kb}}
\DeclareMathOperator{\lbar}{\bar{\lb}}
\DeclareMathOperator{\mbar}{\bar{\mb}}
\DeclareMathOperator{\nbar}{\bar{\nbb}}
\DeclareMathOperator{\obar}{\bar{\ob}}
\DeclareMathOperator{\pbar}{\bar{\pb}}
\DeclareMathOperator{\qbar}{\bar{\qb}}
\DeclareMathOperator{\rbar}{\bar{\rb}}
\DeclareMathOperator{\sbar}{\bar{\sbb}}
\DeclareMathOperator{\tbar}{\bar{\tb}}
\DeclareMathOperator{\ubar}{\bar{\ub}}
\DeclareMathOperator{\vbar}{\bar{\vb}}
\DeclareMathOperator{\wbar}{\bar{\wb}}
\DeclareMathOperator{\xbar}{\bar{\xb}}
\DeclareMathOperator{\ybar}{\bar{\yb}}
\DeclareMathOperator{\zbar}{\bar{\zb}}

\DeclareMathOperator{\Ab}{\mathbf{A}}
\DeclareMathOperator{\Bb}{\mathbf{B}}
\DeclareMathOperator{\Cb}{\mathbf{C}}
\DeclareMathOperator{\Db}{\mathbf{D}}
\DeclareMathOperator{\Eb}{\mathbf{E}}
\DeclareMathOperator{\Fb}{\mathbf{F}}
\DeclareMathOperator{\Gb}{\mathbf{G}}
\DeclareMathOperator{\Hb}{\mathbf{H}}
\DeclareMathOperator{\Ib}{\mathbf{I}}
\DeclareMathOperator{\Jb}{\mathbf{J}}
\DeclareMathOperator{\Kb}{\mathbf{K}}
\DeclareMathOperator{\Lb}{\mathbf{L}}
\DeclareMathOperator{\Mb}{\mathbf{M}}
\DeclareMathOperator{\Nb}{\mathbf{N}}
\DeclareMathOperator{\Ob}{\mathbf{O}}
\DeclareMathOperator{\Pb}{\mathbf{P}}
\DeclareMathOperator{\Qb}{\mathbf{Q}}
\DeclareMathOperator{\Rb}{\mathbf{R}}
\DeclareMathOperator{\Sbb}{\mathbf{S}}
\DeclareMathOperator{\Tb}{\mathbf{T}}
\DeclareMathOperator{\Ub}{\mathbf{U}}
\DeclareMathOperator{\Vb}{\mathbf{V}}
\DeclareMathOperator{\Wb}{\mathbf{W}}
\DeclareMathOperator{\Xb}{\mathbf{X}}
\DeclareMathOperator{\Yb}{\mathbf{Y}}
\DeclareMathOperator{\Zb}{\mathbf{Z}}

\DeclareMathOperator{\Abar}{\bar{A}}
\DeclareMathOperator{\Bbar}{\bar{B}}
\DeclareMathOperator{\Cbar}{\bar{C}}
\DeclareMathOperator{\Dbar}{\bar{D}}
\DeclareMathOperator{\Ebar}{\bar{E}}
\DeclareMathOperator{\Fbar}{\bar{F}}
\DeclareMathOperator{\Gbar}{\bar{G}}
\DeclareMathOperator{\Hbar}{\bar{H}}
\DeclareMathOperator{\Ibar}{\bar{I}}
\DeclareMathOperator{\Jbar}{\bar{J}}
\DeclareMathOperator{\Kbar}{\bar{K}}
\DeclareMathOperator{\Lbar}{\bar{L}}
\DeclareMathOperator{\Mbar}{\bar{M}}
\DeclareMathOperator{\Nbar}{\bar{N}}
\DeclareMathOperator{\Obar}{\bar{O}}
\DeclareMathOperator{\Pbar}{\bar{P}}
\DeclareMathOperator{\Qbar}{\bar{Q}}
\DeclareMathOperator{\Rbar}{\bar{R}}
\DeclareMathOperator{\Sbar}{\bar{S}}
\DeclareMathOperator{\Tbar}{\bar{T}}
\DeclareMathOperator{\Ubar}{\bar{U}}
\DeclareMathOperator{\Vbar}{\bar{V}}
\DeclareMathOperator{\Wbar}{\bar{W}}
\DeclareMathOperator{\Xbar}{\bar{X}}
\DeclareMathOperator{\Ybar}{\bar{Y}}

\DeclareMathOperator{\Abbar}{\bar{\Ab}}
\DeclareMathOperator{\Bbbar}{\bar{\Bb}}
\DeclareMathOperator{\Cbbar}{\bar{\Cb}}
\DeclareMathOperator{\Dbbar}{\bar{\Db}}
\DeclareMathOperator{\Ebbar}{\bar{\Eb}}
\DeclareMathOperator{\Fbbar}{\bar{\Fb}}
\DeclareMathOperator{\Gbbar}{\bar{\Gb}}
\DeclareMathOperator{\Hbbar}{\bar{\Hb}}
\DeclareMathOperator{\Ibbar}{\bar{\Ib}}
\DeclareMathOperator{\Jbbar}{\bar{\Jb}}
\DeclareMathOperator{\Kbbar}{\bar{\Kb}}
\DeclareMathOperator{\Lbbar}{\bar{\Lb}}
\DeclareMathOperator{\Mbbar}{\bar{\Mb}}
\DeclareMathOperator{\Nbbar}{\bar{\Nb}}
\DeclareMathOperator{\Obbar}{\bar{\Ob}}
\DeclareMathOperator{\Pbbar}{\bar{\Pb}}
\DeclareMathOperator{\Qbbar}{\bar{\Qb}}
\DeclareMathOperator{\Rbbar}{\bar{\Rb}}
\DeclareMathOperator{\Sbbar}{\bar{\Sb}}
\DeclareMathOperator{\Tbbar}{\bar{\Tb}}
\DeclareMathOperator{\Ubbar}{\bar{\Ub}}
\DeclareMathOperator{\Vbbar}{\bar{\Vb}}
\DeclareMathOperator{\Wbbar}{\bar{\Wb}}
\DeclareMathOperator{\Xbbar}{\bar{\Xb}}
\DeclareMathOperator{\Ybbar}{\bar{\Yb}}
\DeclareMathOperator{\Zbbar}{\bar{\Zb}}

\DeclareMathOperator{\Ahat}{\widehat{A}}
\DeclareMathOperator{\Bhat}{\widehat{B}}
\DeclareMathOperator{\Chat}{\widehat{C}}
\DeclareMathOperator{\Dhat}{\widehat{D}}
\DeclareMathOperator{\Ehat}{\widehat{E}}
\DeclareMathOperator{\Fhat}{\widehat{F}}
\DeclareMathOperator{\Ghat}{\widehat{G}}
\DeclareMathOperator{\Hhat}{\widehat{H}}
\DeclareMathOperator{\Ihat}{\widehat{I}}
\DeclareMathOperator{\Jhat}{\widehat{J}}
\DeclareMathOperator{\Khat}{\widehat{K}}
\DeclareMathOperator{\Lhat}{\widehat{L}}
\DeclareMathOperator{\Mhat}{\widehat{M}}
\DeclareMathOperator{\Nhat}{\widehat{N}}
\DeclareMathOperator{\Ohat}{\widehat{O}}
\DeclareMathOperator{\Phat}{\widehat{P}}
\DeclareMathOperator{\Qhat}{\widehat{Q}}
\DeclareMathOperator{\Rhat}{\widehat{R}}
\DeclareMathOperator{\Shat}{\widehat{S}}
\DeclareMathOperator{\That}{\widehat{T}}
\DeclareMathOperator{\Uhat}{\widehat{U}}
\DeclareMathOperator{\Vhat}{\widehat{V}}
\DeclareMathOperator{\What}{\widehat{W}}
\DeclareMathOperator{\Xhat}{\widehat{X}}
\DeclareMathOperator{\Yhat}{\widehat{Y}}
\DeclareMathOperator{\Zhat}{\widehat{Z}}

\DeclareMathOperator{\Abhat}{\widehat{\Ab}}
\DeclareMathOperator{\Bbhat}{\widehat{\Bb}}
\DeclareMathOperator{\Cbhat}{\widehat{\Cb}}
\DeclareMathOperator{\Dbhat}{\widehat{\Db}}
\DeclareMathOperator{\Ebhat}{\widehat{\Eb}}
\DeclareMathOperator{\Fbhat}{\widehat{\Fb}}
\DeclareMathOperator{\Gbhat}{\widehat{\Gb}}
\DeclareMathOperator{\Hbhat}{\widehat{\Hb}}
\DeclareMathOperator{\Ibhat}{\widehat{\Ib}}
\DeclareMathOperator{\Jbhat}{\widehat{\Jb}}
\DeclareMathOperator{\Kbhat}{\widehat{\Kb}}
\DeclareMathOperator{\Lbhat}{\widehat{\Lb}}
\DeclareMathOperator{\Mbhat}{\widehat{\Mb}}
\DeclareMathOperator{\Nbhat}{\widehat{\Nb}}
\DeclareMathOperator{\Obhat}{\widehat{\Ob}}
\DeclareMathOperator{\Pbhat}{\widehat{\Pb}}
\DeclareMathOperator{\Qbhat}{\widehat{\Qb}}
\DeclareMathOperator{\Rbhat}{\widehat{\Rb}}
\DeclareMathOperator{\Sbhat}{\widehat{\Sb}}
\DeclareMathOperator{\Tbhat}{\widehat{\Tb}}
\DeclareMathOperator{\Ubhat}{\widehat{\Ub}}
\DeclareMathOperator{\Vbhat}{\widehat{\Vb}}
\DeclareMathOperator{\Wbhat}{\widehat{\Wb}}
\DeclareMathOperator{\Xbhat}{\widehat{\Xb}}
\DeclareMathOperator{\Ybhat}{\widehat{\Yb}}
\DeclareMathOperator{\Zbhat}{\widehat{\Zb}}

\DeclareMathOperator{\Acal}{\mathcal{A}}
\DeclareMathOperator{\Bcal}{\mathcal{B}}
\DeclareMathOperator{\Ccal}{\mathcal{C}}
\DeclareMathOperator{\Dcal}{\mathcal{D}}
\DeclareMathOperator{\Ecal}{\mathcal{E}}
\DeclareMathOperator{\Fcal}{\mathcal{F}}
\DeclareMathOperator{\Gcal}{\mathcal{G}}
\DeclareMathOperator{\Hcal}{\mathcal{H}}
\DeclareMathOperator{\Ical}{\mathcal{I}}
\DeclareMathOperator{\Jcal}{\mathcal{J}}
\DeclareMathOperator{\Kcal}{\mathcal{K}}
\DeclareMathOperator{\Lcal}{\mathcal{L}}
\DeclareMathOperator{\Mcal}{\mathcal{M}}
\DeclareMathOperator{\Ncal}{\mathcal{N}}
\DeclareMathOperator{\Ocal}{\mathcal{O}}
\DeclareMathOperator{\Pcal}{\mathcal{P}}
\DeclareMathOperator{\Qcal}{\mathcal{Q}}
\DeclareMathOperator{\Rcal}{\mathcal{R}}
\DeclareMathOperator{\Scal}{\mathcal{S}}
\DeclareMathOperator{\Tcal}{\mathcal{T}}
\DeclareMathOperator{\Ucal}{\mathcal{U}}
\DeclareMathOperator{\Vcal}{\mathcal{V}}
\DeclareMathOperator{\Wcal}{\mathcal{W}}
\DeclareMathOperator{\Xcal}{\mathcal{X}}
\DeclareMathOperator{\Ycal}{\mathcal{Y}}
\DeclareMathOperator{\Zcal}{\mathcal{Z}}

\DeclareMathOperator{\Atilde}{\widetilde{A}}
\DeclareMathOperator{\Btilde}{\widetilde{B}}
\DeclareMathOperator{\Ctilde}{\widetilde{C}}
\DeclareMathOperator{\Dtilde}{\widetilde{D}}
\DeclareMathOperator{\Etilde}{\widetilde{E}}
\DeclareMathOperator{\Ftilde}{\widetilde{F}}
\DeclareMathOperator{\Gtilde}{\widetilde{G}}
\DeclareMathOperator{\Htilde}{\widetilde{H}}
\DeclareMathOperator{\Itilde}{\widetilde{I}}
\DeclareMathOperator{\Jtilde}{\widetilde{J}}
\DeclareMathOperator{\Ktilde}{\widetilde{K}}
\DeclareMathOperator{\Ltilde}{\widetilde{L}}
\DeclareMathOperator{\Mtilde}{\widetilde{M}}
\DeclareMathOperator{\Ntilde}{\widetilde{N}}
\DeclareMathOperator{\Otilde}{\widetilde{O}}
\DeclareMathOperator{\Ptilde}{\widetilde{P}}
\DeclareMathOperator{\Qtilde}{\widetilde{Q}}
\DeclareMathOperator{\Rtilde}{\widetilde{R}}
\DeclareMathOperator{\Stilde}{\widetilde{S}}
\DeclareMathOperator{\Ttilde}{\widetilde{T}}
\DeclareMathOperator{\Utilde}{\widetilde{U}}
\DeclareMathOperator{\Vtilde}{\widetilde{V}}
\DeclareMathOperator{\Wtilde}{\widetilde{W}}
\DeclareMathOperator{\Xtilde}{\widetilde{X}}
\DeclareMathOperator{\Ytilde}{\widetilde{Y}}
\DeclareMathOperator{\Ztilde}{\widetilde{Z}}


%%%%%%%% Widely accepted definitions %%%%%%%%%%%%%%%

\DeclareMathOperator{\CC}{\mathbb{C}} % Complex numbers
\DeclareMathOperator{\EE}{\mathbb{E}} % Expectation
\DeclareMathOperator{\KK}{\mathbb{K}} % Arbitrary field
\DeclareMathOperator{\MM}{\mathbb{M}} % Median
\DeclareMathOperator{\NN}{\mathbb{N}} % Natural numbers
\DeclareMathOperator{\PP}{\mathbb{P}} % Probability
\DeclareMathOperator{\QQ}{\mathbb{Q}} % Rationals
\DeclareMathOperator{\RR}{\mathbb{R}} % Real numbers
\DeclareMathOperator{\ZZ}{\mathbb{Z}} % Integers

\DeclareMathOperator{\one}{\mathbf{1}}  % Identity
\DeclareMathOperator{\zero}{\mathbf{0}} % Zero

\DeclareMathOperator*{\mini}{\mathop{\mathrm{minimize}}}
\DeclareMathOperator*{\maxi}{\mathop{\mathrm{maximize}}}

\DeclareMathOperator*{\argsup}{\mathop{\mathrm{argsup}}}
\DeclareMathOperator*{\arcsinh}{\mathop{\mathrm{arcsinh}}}
\DeclareMathOperator*{\limit}{\mathop{\mathrm{limit}}}
\DeclareMathOperator{\sgn}{\mathop{\mathrm{sign}}}
\DeclareMathOperator{\tr}{\mathop{\mathrm{tr}}}
\DeclareMathOperator{\rank}{\mathop{\mathrm{rank}}}
\DeclareMathOperator{\traj}{\mathop{\mathrm{Traj}}}
\DeclareMathOperator{\diag}{diag}

%%%%%%%% Bold Greek Letters %%%%%%%%%%%%%%%
\newcommand{\sigmab}{\bm{\sigma}}
\newcommand{\Sigmab}{\mathbf{\Sigma}}
\newcommand{\Thetab}{{\bm{\Theta}}}
\newcommand{\thetab}{{\bm{\theta}}}
\newcommand{\xib}{{\bm{\xi}}}
\newcommand{\Xib}{{\bm{\Xi}}}
\newcommand{\zetab}{{\bm{\zeta}}}
\newcommand{\alphab}{{\bm{\alpha}}}
\newcommand{\taub}{{\bm{\tau}}}
\newcommand{\etab}{{\bm{\eta}}}


\DeclareMathOperator{\alphahat}{\widehat{\alpha}}
%%%%%%%% Mess around with LaTeX %%%%%%%%%%%%%%%

%% Some style files might actually define these variables.
%% So don't mess with them if they are already defined

\ifx\BlackBox\undefined
\newcommand{\BlackBox}{\rule{1.5ex}{1.5ex}}  % end of proof
\fi

\ifx\proof\undefined
\newenvironment{proof}{\par\noindent{\bf Proof\ }}{\hfill\BlackBox\\[2mm]}
\fi

\ifx\theorem\undefined
\newtheorem{theorem}{Theorem}[section]
\fi
\ifx\example\undefined
\newtheorem{example}{Example}
\fi
\ifx\lemma\undefined
\newtheorem{lemma}{Lemma}[section]
\fi

\newtheorem{assumption}{Assumption}[section]

\ifx\corollary\undefined
\newtheorem{corollary}{Corollary}[section]
\fi


\ifx\definition\undefined
\newtheorem{definition}[theorem]{Definition}
\fi

\ifx\proposition\undefined
\newtheorem{proposition}[theorem]{Proposition}
\fi

\ifx\remark\undefined
\newtheorem{remark}[theorem]{Remark}
\fi

\ifx\conjecture\undefined
\newtheorem{conjecture}[theorem]{Conjecture}
\fi

\ifx\factoid\undefined
\newtheorem{factoid}[theorem]{Fact}
\fi

\ifx\axiom\undefined
\newtheorem{axiom}[theorem]{Axiom}
\fi

%%%%%%%% Utility functions %%%%%%%%%%%%%%%

\newcommand{\eq}[1]{(\ref{#1})}
\newcommand{\mymatrix}[2]{\left[\begin{array}{#1} #2 \end{array}\right]}
\newcommand{\mychoose}[2]{\left(\begin{array}{c} #1 \\ #2 \end{array}\right)}
\newcommand{\mydet}[1]{\det\left[ #1 \right]}
\newcommand{\sembrack}[1]{[\![#1]\!]}

%brackets
\newcommand{\inner}[2]{\left\langle #1,#2 \right\rangle}
\newcommand{\rbr}[1]{\left(#1\right)}
\newcommand{\sbr}[1]{\left[#1\right]}
\newcommand{\cbr}[1]{\left\{#1\right\}}
\newcommand{\nbr}[1]{\left\|#1\right\|}
\newcommand{\abr}[1]{\left|#1\right|}

\newcommand{\ea}{\emph{et al. }}

\newcommand{\vishy}[1]{{\bf[#1--VISHY]}}
\newcommand{\tim}[1]{{\bf[#1 --TIM]}}

%Logic
%\newcommand{\implies}{\Rightarrow}
%\newcommand{\iff}{\Leftrightarrow}
\newcommand{\goesto}{\rightarrow}
\newcommand{\suchthat}{\ensuremathmode{\text{ such that }}}
\newcommand{\st}{\suchthat}

%%%%%%%% Specific symbols for this project %%%%%%%%%%%%%%%

% upright names for matrices --don't use for single-letter function names!
\newcommand{\A}{\mathbf{A}}
\newcommand{\As}{\mathbf{A}^{\!\!*}}
\newcommand{\B}{\mathbf{B}}
\newcommand{\Bs}{\mathbf{B}^{*}}

% p and variants
\newcommand{\p}{\mathbf{p}}
\newcommand{\pp}{\p^{\prime}}
\newcommand{\ps}{\p^*}
\newcommand{\po}{\p_0}
\newcommand{\pos}{\p_{0}^{*}}
\newcommand{\poi}{[\p_{0}]_i}
\newcommand{\pii}{\p_{i}}
\newcommand{\pip}{\p_{i}^{\prime}}
\newcommand{\pis}{\p_{i}^{*}}
\newcommand{\pBar}{\bar{\p}}
\newcommand{\pohat}{\widehat{\p}_0}
\newcommand{\phat}{\widehat{\p}}
\newcommand{\psBar}{\bar{\p}^{*}}
\newcommand{\pbb}{\p_{\bb}}

% mu and variants
\newcommand{\mub}{{\bm{\mu}}}
\newcommand{\ms}{\m^{*}}
\newcommand{\mo}{\m_0}
\newcommand{\mh}{\widehat{\m}}
\newcommand{\mBar}{\bar{\m}}
\newcommand{\msBar}{\bar{\m}^{*}}

% Bold mu_B
\newcommand{\mBBar}{\bar{\m}_{\B}}
% mu_1
\newcommand{\mone}{\mu_{\mathbf{1}}}
\newcommand{\mones}{\mu_{\mathbf{1}}^{*}}
\newcommand{\monesBar}{\bar{\mu}^{*}_\mathbf{1}}

% q and variants
\newcommand{\q}{\mathbf{q}}
\newcommand{\qs}{\q^{*}}
\newcommand{\qii}{\q_{i}}

% b and variants
\newcommand{\bone}{\mathbf{b_1}}
%\newcommand{\bone}{\ones}

% c and variants
\newcommand{\cs}{\cbb^{*}}

% Functions
\newcommand{\Fs}{F^{*}}
\newcommand{\Gs}{G^{*}}
\newcommand{\Hs}{H^{*}}
\newcommand{\fsi}{f_{i}^{*}}

% Dual Spaces
\DeclareMathOperator{\Xcals}{\Xcal^{*}}
\DeclareMathOperator{\Mcals}{\Mcal^{*}}

% set and domain concepts
\DeclareMathOperator{\intr}{\mathrm{int}}   % Interior
\DeclareMathOperator{\bd}{\mathrm{bdry}}      % Boundary (see RocWet98)
\DeclareMathOperator{\dom}{\mathrm{dom}}    % Effective Domain
\DeclareMathOperator{\epi}{\mathrm{epi}}    % Epigraph
\DeclareMathOperator{\ran}{\mathrm{range}}  % Range of an operator

\newcommand{\by}{\times}

% gradient: \grad{F}{\m}
\newcommand{\grad}[2]{\nabla #1 \left( #2 \right) }
\newcommand{\hess}[2]{\nabla^2 #1 \left( #2 \right) }
\newcommand{\grd}{\nabla} % simpler version!
\newcommand{\breg}[1]{\Delta #1} % Bregman divergence

% subgradient: \subgrad{F}{\m}
\newcommand{\subgrad}[2]{\partial #1 \left( #2 \right) }
% subdiferential
\newcommand{\subdiff}{\partial}

% Vector of all ones
\newcommand{\ones}{\eb}

% Sum from i equal 1 to n
\newcommand{\sumin}{\sum_{i=1}^{n}}

% Deformed logarithm wrt \phi
\newcommand{\lphi}{\log_{\phi}}

% function k_{\phi}
\newcommand{\kphi}{k_{\phi}}

% reparam for dual log
%\newcommand{\phib}{\phi^{\bullet}}
\newcommand{\phib}{\pmb{\phi}}
\newcommand{\psib}{\psi^{\bullet}}

% Deformed exponential wrt \phi
\newcommand{\ephi}{\exp_{\phi}}

% One over deformed exponential wrt \phi
\newcommand{\ephib}{\exp^{\bullet}_{\phi}}

% Gradient of deformed exponential wrt \phi
\newcommand{\ephip}{\nabla_{x} \exp_{\phi}}

% Gradient of t-exponential wrt \phi
\newcommand{\ephit}{\nabla_{x} \exp_{t}}

% Deformed logarithm wrt \psi
\newcommand{\lpsi}{\log_{\psi}}

% One over deformed logarithm wrt \psi
\newcommand{\lpsib}{\log^{\bullet}_{\psi}}
\newcommand{\lphib}{\log^{\bullet}_{\phi}}

%composition needed for conjugate--formerly combo
\newcommand{\lcirce}{\lpsi \circ \ephib}

% S and related
\newcommand{\Sp}{S_{\phi}}
\newcommand{\Sps}{S^{*}_{\phi}}
\newcommand{\smp}{s_{\phi}}
\newcommand{\smpp}{s^{\prime}_{\phi}}
\newcommand{\smps}{s^{*}_{\phi}}

% T
\newcommand{\T}{T^{*}}
\DeclareMathOperator{\tp}{\tilde{p}}
\DeclareMathOperator{\tq}{\tilde{q}}
\DeclareMathOperator{\tg}{\tilde{g}}
\DeclareMathOperator{\tmu}{\tilde{\mu}}
\DeclareMathOperator{\tthe}{\tilde{\theta}}
\DeclareMathOperator{\ttheb}{{\bf \tilde{\theta}}}
\DeclareMathOperator{\tmc}{\widehat{\Mcal}}
\DeclareMathOperator{\tv}{\tilde{v}}


\DeclareMathOperator{\Remp}{R_{\text{emp}}}

\DeclareMathOperator{\tsallis}{\text{tsallis}}

%\newcommand{\inner}[2]{\left\langle #1,#2 \right\rangle}
%\newcommand{\rbr}[1]{\left(#1\right)}
%\newcommand{\cbr}[1]{\left\{#1\right\}}
%\newcommand{\nbr}[1]{\left\|#1\right\|}
%\newcommand{\abr}[1]{\left|#1\right|}
\def\ah{\widehat{a}}
\def\ahb{\mathbf{\ah}}
%\newcommand{\sbr}[1]{\left[#1\right]}
%\DeclareMathOperator*{\arginf}{\mathop{\mathrm{arginf}}}
\DeclareMathOperator*{\lnm}{\mathop{\mathbf{ln}}}
\mathchardef\Theta="7102 \mathchardef\theta="7112
\newcommand{\I}{{\mathcal{I}}}
\newcommand{\tops}{{\!\top\!}}
%\newcommand{\R}{\hbox{\bf R}}
%\newcommand{\z}{{\mathbf{z}}}
\newcommand{\vxi}{{\boldsymbol{\xi}}}
\newcommand{\valpha}{{\boldsymbol{\alpha}}}
\newcommand{\m}{{\mathbf{m}}}
\newcommand{\tc}[2]{\textcolor{#1}{#2}}
\renewcommand{\k}{k}
\ifx\br\undefined
\newcommand{\br}{\color{red}}
\else
\renewcommand{\br}{\color{red}}
\fi
%\newcommand{\bb}{\color{blue}}
\newcommand{\brh}{\color{red}}
\newcommand{\bbh}{\color{blue}}
\newcommand{\N}{\hbox{\bf N}}
\newcommand{\Nl}{N}
\newcommand{\Proj}{P} 
\newcommand{\Hc}{{H}}             % base hypothesis class
\def\x{{\mathbf{x}}} 
\def\X{{\mathbf{X}}} 
\def\Xr{{\rm\sf X}} 
\def\y{{\mathbf{y}}} 
\def\w{{\mathbf{w}}} 
\def\W{{\mathbf{W}}} 
\def\RR{{\mathbf{R}}} 
\def\NN{{\mathbf{N}}} 
\newcommand{\M}{{\boldsymbol{\mu}}}
\newcommand{\ev}{\hbox{\bf v}}
\newcommand{\EV}{\hbox{\bf V}}
\def\H{{\cal{H}}}
\def\F{{\cal{F}}}
\def\Fc{{S}}
\def\Sample{{\Z}}
\def\vc{{d}}
\def\D{{\cal{D}}} 
\def\hatD{\hat{D}} 
\def\A{\hat{A}} 
\def\B{\hat{B}} 
\def\T{\hat{T}} 
\def\G{\mathcal{G}} 
\def\P{\hat{P}} 
\newcommand{\al}{\alpha}
%\newcommand{\Fs}{{\mathcal{F}}}   % feature space
\newcommand{\AL}{{\boldsymbol \alpha}}
\newcommand{\BE}{{\boldsymbol \beta}}
\newcommand{\XI}{{\boldsymbol \xi}}
%\newcommand{\sgn}{\mbox{sgn}}
\def\one{{\mathbf{1}}}
\def\hatf{\hat{f}}


%\definecolor{Red}{rgb}{0.75,0,0}
%\definecolor{Blue}{rgb}{0,0,0.75}
%\definecolor{Green}{rgb}{0,0.75,0}
%\definecolor{Black}{rgb}{0,0,0}
%\definecolor{Orange}{rgb}{0.9,0.5,0}

%\newcommand{\Blue}[1]{\textcolor{blue}{#1}}
%\newcommand{\Green}[1]{\color{OliveGreen}{#1}}
%\newcommand{\Green}[1]{\textcolor{mygreen}{#1}}
%\newcommand{\Red}[1]{\textcolor{red}{#1}}
%\newcommand{\Orange}[1]{\textcolor{orange}{#1}}

%\newcommand{\argmin}{\mathop{\rm argmin}}
%\newcommand{\tr}{\mathop{\rm tr}}
%\newcommand{\argmax}{\mathop{\rm argmax}}
\newcommand{\Ind}{\mathop{\rm I}}
\renewcommand{\d}{{\bf d}}
\renewcommand{\l}{\Nl}
%\newcommand{\Hcal}{\mathcal{H}}
\newcommand{\Z}{\bf Z}

\newcommand{\mg}{{\rho}}
\newcommand{\MG}{{\varrho}}
\newcommand{\half}{\frac{1}{2}}
\newcommand{\ct}{{\tilde{c}}}

%\newtheorem{example}{Example} 
%\newtheorem{remark}[example]{Remark}
%\newtheorem{theorem}[example]{Theorem} 
%\newtheorem{lemma}[example]{Lemma} 
%\newtheorem{proposition}[example]{Proposition} 
%\newtheorem{corollary}[example]{Corollary}
%\newtheorem{definition}[example]{Definition}

%%% Local Variables: 
%%% mode: latex
%%% TeX-master: "tit"
%%% End: 

\newcommand{\bigdot}[1]{\stackrel{\bullet}{#1}} 
\newcommand{\textc}{\color{hblue}}
\newcommand{\fgc}{\color{red}}
\newcommand{\othc}{\color{green}}
\newcommand{\bgc}{\color{black}}
\newcommand{\titc}{\bf\fgc}

%\renewcommand{{\tem}}{{\item[{\fgc $\bullet$}]}}
%\newcommand{\Ref}[1]{{\hfill\othc [#1]}}

\newcommand{\s}{\vspace*{2cm}}
\newcommand{\hs}{\vspace*{1cm}}
\ifx\bl\undefined
\newcommand{\bl}{\hspace*{0.8cm}}
\else
\renewcommand{\bl}{\hspace*{0.8cm}}
\fi
\renewcommand{\ss}{\vspace*{4cm}}
\newcommand{\fhs}{\\[2mm]}
\newcommand{\fs}{\\[4mm]}
\newcommand{\fss}{\\[8mm]}
\newcommand{\bra}[1]
{\left. \begin{array}{c} \\[#1] \end{array} \right\} }

%\newcommand{\Lbf}{\LARGE\bf}
\newcommand{\lbf}{\large\bf}


%\newcommand{\grad}{\nabla}
\newcommand{\diff}{d\;}
%\newcommand{\qed}{$\;\;\;\Box$}
\newcommand{\equref}[1]{(\ref{#1})}
\newcommand{\sfrac}[2]{\mbox{$\frac{#1}{#2}$}}
%\newcommand{\half}{\sfrac{1}{2}}
%\newcommand{\R}{{\bf R}}
\newcommand{\eqdef}{\stackrel{\rm def}{=}}
%\newcommand{\argmin}{\mbox{argmin}}
%\renewcommand{\min}{\mbox{min}}
%\renewcommand{\max}{\mbox{max}}


\newcommand{\Wc}{{\mathcal{W}}}


%\newcommand{\vect}[1]{{\bf #1}}
\newcommand{\vect}[1]{{\mbox{\boldmath $#1$}}}
\newcommand{\squiggle}[1]{{\widetilde{#1}}}
%\newcommand{\bm}[1]{{\mbox{$\boldmath#1 $}}}

\newcommand{\dat}{{\cal{X}}}
\renewcommand{\d}{\Delta}

%\newcommand{\vb}{V_B}
\newcommand{\uo}{U_1}
\newcommand{\utp}{U_{t+1}}

%\newcommand{\zero}{\vect{0}}
%\renewcommand{\one}{\vect{1}}


\newcommand{\f}{\vect{f}}
\renewcommand{\x}{\vect{x}}
\renewcommand{\u}{\vect{u}}
%\newcommand{\bb}{\vect{b}}
\renewcommand{\y}{\vect{y}}
\newcommand{\xs}{\tilde{\x}}
\newcommand{\xo}{\x_1}
\newcommand{\xt}{\x_t}
\newcommand{\xq}{\x_q}
\newcommand{\xtp}{\x_{t+1}}
\newcommand{\xtm}{\x_{t-1}}

%\newcommand{\yt}{y_t}
\newcommand{\yq}{y_q}
%\newcommand{\yh}{\hat{y}}
\newcommand{\yht}{\yh_t}

\newcommand{\mus}{\squiggle{\mu}}
\renewcommand{\m}{\vect{\mu}}
%\newcommand{\mb}{\vect{\mu}_B}
\newcommand{\mt}{\m_t}
%\newcommand{\mr}{\m^*}
%\newcommand{\mo}{\m_1}
\newcommand{\mtp}{\m_{t+1}}
\newcommand{\mtm}{\m_{t-1}}
\newcommand{\mTp}{\m_{T+1}}
%\newcommand{\ms}{\squiggle{\m}}

%\renewcommand{\o}{{\omega}}
%\newcommand{\ot}{\o_t}
%\newcommand{\otp}{\o_{t+1}}
%\newcommand{\os}{\squiggle{\o}}
%\renewcommand{\O}{\bf{\Omega}}
%\renewcommand{\O}{\bf{g(\TH)}}
%\newcommand{\M}{\bf{M}}


\renewcommand{\th}{\vect{\theta}}
\newcommand{\tht}{\th_t}
\newcommand{\tho}{\th_1}
\newcommand{\thb}{\th_B}
\newcommand{\thtp}{\th_{t+1}}
\newcommand{\thTp}{\th_{T+1}}
\newcommand{\ths}{\squiggle{\th}}
\newcommand{\thetas}{\squiggle{\theta}}
\newcommand{\thr}{\th^*}

\newcommand{\ta}{\vect{\tau}}
\renewcommand{\w}{\vect{w}}
\newcommand{\uu}{\vect{u}}
\newcommand{\dv}{\vect{d}}

\newcommand{\ws}{\squiggle{\w}}
\newcommand{\Ws}{\widetilde{\W}}
\newcommand{\wo}{\w_1}
\newcommand{\wt}{\w_t}
%\newcommand{\wb}{\w_B}

\newcommand{\wtp}{\w_{t+1}}
\newcommand{\vtp}{\vv_{t+1}}
\newcommand{\ttheta}{\mbox {{\boldmath $\theta$}}}
\newcommand{\zz}{\vect{z}}

\renewcommand{\TH}{\vect{\Theta}}

%\newcommand{\at}{{\alpha_t}}
\newcommand{\At}{\vect{\alpha}_t}
\newcommand{\aq}{\alpha_q}
\newcommand{\atq}{\at^q}
\newcommand{\atmq}{\a_{t-1}^q}
\newcommand{\btq}{\beta_t^q}

\newcommand{\e}{\eta}
%\newcommand{\eb}{\eta_B}
\newcommand{\ebi}{\e_B^{-1}}
\newcommand{\et}{\e_t}
\newcommand{\eT}{\e_T}
\newcommand{\eo}{\e_1}
\newcommand{\eoo}{\e_2}
\newcommand{\etp}{\e_{t+1}}
\newcommand{\eTp}{\e_{T+1}}
\newcommand{\etpp}{\e_{t+2}}
\newcommand{\eTpp}{\e_{T+2}}
\newcommand{\etm}{\e_{t-1}}
\newcommand{\eti}{\et^{-1}}
\newcommand{\eoi}{\eo^{-1}}
\newcommand{\eooi}{\eoo^{-1}}
\newcommand{\etpi}{\etp^{-1}}
\newcommand{\eTpi}{\eTp^{-1}}
\newcommand{\etppi}{\etpp^{-1}}
\newcommand{\eTppi}{\eTpp^{-1}}
\newcommand{\etmi}{\etm^{-1}}
\newcommand{\es}{\squiggle{\e}}

\renewcommand{\I}{\vect{I}}


\newcommand{\Ei}{\E^{-1}}
%\newcommand{\Eb}{\E_B}
\newcommand{\Et}{\E_t}
\newcommand{\ET}{\E_T}
\newcommand{\Eo}{\E_1}
\newcommand{\Etp}{\E_{t+1}}
\newcommand{\Eti}{\Et^{-1}}
\newcommand{\Eoi}{\Eo^{-1}}
\newcommand{\Etpi}{\Etp^{-1}}
\newcommand{\Ebi}{\E_B^{-1}}
\newcommand{\Es}{\squiggle{\E}}

\renewcommand{\at}{\a_t}
\newcommand{\atp}{\a_{t+1}}

\newcommand{\pred}{{\hat{a}}}
\newcommand{\predt}{{\pred_t}}

\newcommand{\Q}{\Phi}
%\newcommand{\q}{\phi}

\renewcommand{\z}{\vect{z}}
\newcommand{\zs}{\tilde{\z}}
\renewcommand{\r}{\vect{r}}

\newcommand{\lt}{{L_t}}
\newcommand{\lot}{{L_{1..t}}}
\newcommand{\loT}{{L_{1..T}}}
\newcommand{\lotm}{{L_{1..t-1}}}
\renewcommand{\lq}{{L_q}}

\renewcommand{\thefootnote}{\fnsymbol{footnote}}


\title{RDM-DC: Poisoning Resilient Dataset Condensation \\ with Robust Distribution Matching (Supplementary Material)}

% The standard author block has changed for UAI 2023 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{\href{mailto:<th.zheng@mail.utoronto.ca>?Subject=Your UAI 2023 paper}{Tianhang Zheng}{}}
\author[1]{\href{mailto:<bli@ece.toronto.edu>?Subject=Your UAI 2023 paper}{Baochun Li}{}}
% Add affiliations after the authors
\affil[1]{%
	Department of Electrical and Computer Engineering\\
	University of Toronto
}
 
 
\begin{document}
\maketitle

\section{Omitted Proof}
\begin{lemma}\label{prop:main}
	Assuming that $\gD$ and $\gB$ have bounded covariance matrices $\bm{\Sigma}_{\gD}, \bm{\Sigma}_{\gB}\leq \sigma^2\bm{I}$, and their means have an apparent difference, {\em i.e.,} $\|\bm{\mu}_{\gD} - \bm{\mu}_{\gB}\|_2^2 \geq \frac{\alpha\sigma^2}{\eps}$ where $\alpha > \frac{2665}{576}$, then if we drop all the representations that satisfies $|\la\bm{r} - \bm{\mu}_{\gP}, \bm{v}\ra| \geq t$ with a certain $t$, then we can reduce the scale of the poisoned deviation from $O(\eps\sqrt{d_{\bm{r}}})$ to $\Theta(\eps^2\sqrt{d}_{\bm{r}})$.
\end{lemma}
To prove the above lemma, we need the help of Chebyshev's inequality, which is introduced in the following.
\begin{lemma}[Chebyshev's inequality]
	Given a scalar random variable $X$, if $\mathbb{E}[X]=\mu$ and $\textrm{Var}[X] = \sigma^2$, then
	\begin{align}
		\mathbb{P}(|X-\mu|\geq t)\leq \frac{\sigma^2}{t^2}
	\end{align}
\end{lemma}
Given Chebyshev's inequality, we have the following corollary, which will be used in the proof of Lemma~\ref{prop:main}.
\begin{corollary}\label{cor:che}
	Given a multi-dimensional variable $\bm{X}$, if $\mathbb{E}[\bm{X}]=\bm{\mu}$ and $Cov[\bm{X}]\leq \sigma^2\bm{I}$, then for any unit vector $\bm{u}$, we have
	\begin{align}
		\mathbb{P}(|\la\bm{X}-\bm{\mu}, \bm{u}\ra| > t) \leq \frac{\sigma^2}{t^2}
	\end{align}
\end{corollary}

\begin{proof}[Proof of Corollary~\ref{cor:che}]
	
	Considering $\la\bm{X}, \bm{u}\ra$ as a scalar random variable, we have $\mathbb{E}[\la\bm{X}, \bm{u}\ra]=\la\bm{\mu}, \bm{u}\ra$ and,
	\begin{align}
		\textrm{Var}[\la\bm{X}, \bm{u}\ra] = \bm{u}^T Cov[\bm{X}] \bm{u} \leq \sigma^2.
	\end{align}
	
	With Chebyshev's inequality, we know that
	\begin{align}
		\mathbb{P}(|\la\bm{X}, \bm{u}\ra-\la\bm{\mu}, \bm{u}\ra|\geq t)\leq \frac{\textrm{Var}[\la\bm{X}, \bm{u}\ra] }{t^2} \leq \frac{\sigma^2}{t^2}
	\end{align}
\end{proof}

Beyond Corollary~\ref{cor:che}, we also need to use the following lemma and corollary in the proof of Lemma~\ref{prop:main}.
\begin{lemma}\label{lemma:separability}
	Given two distributions $\bm{P}$ and $\bm{Q}$ with mean $\bm{\mu}_{\bm{P}}$ and $\bm{\mu}_{\bm{Q}}$ and covariance matrices $\bm{\Sigma}_{\bm{P}}, \bm{\Sigma}_{\bm{Q}}\leq \sigma^2\bm{I}$, if $\|\bm{\mu}_{\bm{P}} - \bm{\mu}_{\bm{Q}}\|_2^2 \geq \frac{\alpha\sigma^2}{\eps}$, then $\la \bm{v}, \bm{\mu}_{\bm{P}}-\bm{\mu}_{\bm{Q}}\ra^2 \geq \frac{\alpha\sigma^2 - \sigma^2/(1-\eps)}{\eps}$ where $\bm{v}$ is the first eigenvector of the covariance matrix of $(1-\eps)\bm{P}+\eps\bm{Q}$.
\end{lemma}
\begin{proof}[Proof of Lemma~\ref{lemma:separability}] 
	The mean of the mixture $(1-\eps)\bm{P}+\eps\bm{Q}$ is $(1-\eps)\bm{\mu}_{\bm{P}} + \eps\bm{\mu}_{\bm{Q}}$, which is denoted by $\bm{\mu}_{\bm{M}}$. We denote $\bm{\mu}_{\bm{P}}-\bm{\mu}_{\bm{Q}}$ by $\bm{\delta}$.
	The covariance matrix of $(1-\eps)\bm{P}+\eps\bm{Q}$ can be expressed as
	\begin{align}
		\mathbb{E}_{\bm{X}\sim(1-\eps)\bm{P}+\eps\bm{Q}}&[(\bm{X} - \bm{\mu}_{\bm{M}})(\bm{X} - \bm{\mu}_{\bm{M}})^T] \nonumber\\
		=&~(1-\eps)\mathbb{E}_{\bm{X}\sim\bm{P}}[(\bm{X} - \bm{\mu}_{\bm{M}})(\bm{X} - \bm{\mu}_{\bm{M}})^T] \nonumber\\
		&~+ \eps\mathbb{E}_{\bm{X}\sim\bm{Q}}[(\bm{X} - \bm{\mu}_{\bm{M}})(\bm{X} - \bm{\mu}_{\bm{M}})^T]
	\end{align}
	Since we have
	\begin{align}
		\mathbb{E}_{\bm{X}\sim\bm{P}}&[(\bm{X} - \bm{\mu}_{\bm{M}})(\bm{X} - \bm{\mu}_{\bm{M}})^T]~\nonumber\\
		&= \mathbb{E}_{\bm{X}\sim\bm{P}}[(\bm{X} - \bm{\mu}_{\bm{P}} + \eps\bm{\delta})(\bm{X} - \bm{\mu}_{\bm{P}} + \eps\bm{\delta})^T]~\nonumber\\
		&= \bm{\Sigma}_{\bm{P}} + \eps^2\bm{\delta}\bm{\delta}^T~\nonumber\\
		\mathbb{E}_{\bm{X}\sim\bm{Q}}&[(\bm{X} - \bm{\mu}_{\bm{M}})(\bm{X} - \bm{\mu}_{\bm{M}})^T]~\nonumber\\
		&= \mathbb{E}_{\bm{X}\sim\bm{Q}}[(\bm{X} - \bm{\mu}_{\bm{Q}} - (1- \eps)\bm{\delta})^T)(\bm{X} - \bm{\mu}_{\bm{Q}} - (1- \eps)\bm{\delta})^T]~\nonumber\\
		&= \bm{\Sigma}_{\bm{Q}}+ (1-\eps)^2\bm{\delta}\bm{\delta}^T~\nonumber,
	\end{align}
	we have a lower bound on the covariance matrix of the mixture $(1-\eps)\bm{P}+\eps\bm{Q}$ as
	\begin{align}
		\bm{\Sigma}_{\bm{M}}=~&\mathbb{E}_{\bm{X}\sim(1-\eps)\bm{P}+\eps\bm{Q}}[(\bm{X} - \bm{\mu}_{\bm{M}})(\bm{X} - \bm{\mu}_{\bm{M}})^T] \nonumber\\
		=&~(1-\eps)\bm{\Sigma}_{\bm{P}} + \eps\bm{\Sigma}_{\bm{Q}} + \eps(1-\eps)\bm{\delta}\bm{\delta}^T
		\geq\eps(1-\eps)\bm{\delta}\bm{\delta}^T.
	\end{align}
	Suppose that $\bm{v}$ is the first eigenvector of $\bm{\Sigma}_{\bm{M}}$ and $\bm{u}=\frac{\bm{\delta}}{\|\bm{\delta}\|_2}$, we then have
	\begin{align}
		\bm{v}^T\bm{\Sigma}_{\bm{M}}\bm{v} \geq \bm{u}^T\bm{\Sigma}_{\bm{M}}\bm{u} \geq \eps(1-\eps)\bm{u}^T\bm{\delta}\bm{\delta}^T\bm{u} = \eps(1-\eps) \|\bm{\delta}\|_2^2.
	\end{align}
	Since $\bm{\Sigma}_{\bm{P}}, \bm{\Sigma}_{\bm{Q}}\leq \sigma^2\bm{I}$, we also have
	\begin{align}
		\bm{v}^T\bm{\Sigma}_{\bm{M}}\bm{v}&= (1-\eps)\bm{v}^T\bm{\Sigma}_{\bm{P}}\bm{v}+ \eps\bm{v}^T\bm{\Sigma}_{\bm{Q}}\bm{v} + \eps(1-\eps)\bm{v}^T\bm{\delta}\bm{\delta}^T\bm{v}\nonumber\\
		&\leq \sigma^2 + \eps(1-\eps)\la \bm{v}, \bm{\delta}\ra^2
	\end{align}
	Thus, we have
	\begin{align}
		\la \bm{v}, \bm{\delta}\ra^2 \geq \frac{\bm{v}^T\bm{\Sigma}_{\bm{M}}\bm{v} - \sigma^2}{\eps(1-\eps)}\geq  \|\bm{\delta}\|_2^2- \frac{\sigma^2}{\eps(1-\eps)}
	\end{align}
	Given the assumption that $\|\bm{\delta}\|_2^2 \geq \frac{\alpha\sigma^2}{\eps}$,
	\begin{align}
		\la \bm{v}, \bm{\delta}\ra^2 \geq  \frac{\alpha\sigma^2 - \sigma^2/(1-\eps)}{\eps}
	\end{align}
\end{proof}

Based on Lemma~\ref{lemma:separability}, we have the following corollary.
\begin{corollary}\label{cor:sep}
	Given the definitions and conditions in Lemma~\ref{lemma:separability}, if $\eps \leq \frac{1}{10}$ and $\alpha > \frac{2665}{576}$, then we have $(1-2\eps)|\la\bm{\delta}, \bm{v}\ra| > \frac{3\sigma}{2\sqrt{\eps}}$.
\end{corollary}

\begin{proof}
	Given Lemma~\ref{lemma:separability}, we have
	\begin{align}
		(1-2\eps)|\la\bm{\delta}, \bm{v}\ra| \geq (1-2\eps)\sqrt{\alpha - \frac{1}{1-\eps}}\frac{\sigma}{\sqrt{\eps}}
	\end{align}
	Since $1-2\eps$ and $- \frac{1}{1-\eps}$ are decreasing functions w.r.t. $\eps$, they achieve the minimum at $\eps=\frac{1}{10}$. Thus, we have
	\begin{align}
		(1-2\eps)|\la\bm{\delta}, \bm{v}\ra| \geq \frac{4}{5}\sqrt{\alpha - \frac{10}{9}}\frac{\sigma}{\sqrt{\eps}}.
	\end{align}
	So if $\alpha >  \frac{2665}{576}$, we have $(1-2\eps)|\la\bm{\delta}, \bm{v}\ra| > \frac{3\sigma}{2\sqrt{\eps}}$.
\end{proof}

\begin{proof}[Proof of Lemma~\ref{prop:main}]
	The mean of the poisoned representation distribution $\gP$ is $\bm{\mu}_{\gP} = (1-\eps)\bm{\mu}_{\gD} + \eps \bm{\mu}_{\gB}$. Let $\bm{\delta} = \bm{\mu}_{\gB} -  \bm{\mu}_{\gD}$ and $t = |\eps\langle \bm{\delta} , \bm{v}\rangle| + \frac{\sigma}{\sqrt{\eps}}$. We denote the covariance matrix of $\gP$ by $\bm{\Sigma}_{\gP}$ and its first eigenvector by $\bm{v}$.
	
	
	%	Assuming that there exists a $\bm{v}$ such that $|\eps\langle \bm{\delta} , \bm{v}\rangle| > \frac{\alpha\sigma}{\sqrt{\eps}}$
	For the original representation distribution, we have
	\begin{align}\label{eq:original_bound}
		\mathbb{P}_{\bm{r}\sim \gD}&[|\la\bm{r} - \bm{\mu}_{\gP}, \bm{v}\ra| > t]  \nonumber\\ 
		&=\mathbb{P}_{\bm{r}\sim \gD}[|\la\bm{r} - \bm{\mu}_{\gD}, \bm{v}\ra - \eps\la\bm{\delta}, \bm{v}\ra|> t]~~\circled{1} ~\nonumber\\
		&\leq \mathbb{P}_{\bm{r}\sim \gD}[ |\la\bm{r} - \bm{\mu}_{\gD}, \bm{v}\ra|> \frac{\sigma}{\sqrt{\eps}}]~~\circled{2} ~\nonumber\\
		& \leq \eps ~~\circled{3} 
	\end{align}
	$\circled{1}$ is because $\bm{\mu}_{\gP}= \bm{\mu}_{\gD} + \epsilon\bm{\delta}$. $\circled{2}$ is because if  $|\la\bm{r} - \bm{\mu}_{\gD}, \bm{v}\ra - \eps\la\bm{\delta}, \bm{v}\ra|> t$, then either $\la\bm{r} - \bm{\mu}_{\gD}\ra > t + \eps\la\bm{\delta}, \bm{v}\ra > \frac{\sigma}{\sqrt{\eps}}$ or $\la\bm{r} - \bm{\mu}_{\gD}\ra < - t + \eps\la\bm{\delta}, \bm{v}\ra < -\frac{\sigma}{\sqrt{\eps}}$ holds true. Thus, we have $|\la\bm{r} - \bm{\mu}_{\gD}\ra|> \frac{\sigma}{\sqrt{\eps}}$, and $\{\bm{r}, |\la\bm{r} - \bm{\mu}_{\gD}, \bm{v}\ra - \eps\la\bm{\delta}, \bm{v}\ra|> t\} \subseteq \{\bm{r}, |\la\bm{r} - \bm{\mu}_{\gD}, \bm{v}\ra|> \frac{\sigma}{\sqrt{\eps}}\}$. Therefore, $\circled{2}$ holds true. $\circled{3}$ is because of Corollary~\ref{cor:che}.
	
	For the poisoned distribution, we have
	\begin{align}
		\mathbb{P}_{\bm{r}\sim \gB}&[|\la\bm{r} - \bm{\mu}_{\gP}, \bm{v}\ra| < t]  \nonumber\\ 
		&=\mathbb{P}_{\bm{r}\sim \gB}[|\la\bm{r} - \bm{\mu}_{\gB}, \bm{v}\ra + (1-\eps)\la\bm{\delta}, \bm{v}\ra|< t]~~\circled{1} ~\nonumber\\
		&\leq \mathbb{P}_{\bm{r}\sim \gB}[ |\la\bm{r} - \bm{\mu}_{\gB}, \bm{v}\ra| > (1-2\eps)|\la\bm{\delta}, \bm{v}\ra|  - \frac{\sigma}{\sqrt{\eps}}]~~\circled{2} ~\nonumber\\
		& \leq \mathbb{P}_{\bm{r}\sim \gB}[ |\la\bm{r} - \bm{\mu}_{\gB}, \bm{v}\ra| >  \frac{\sigma}{2\sqrt{\eps}}] \leq 4\eps~~\circled{3} 
	\end{align}
	$\circled{1}$ is because $\bm{\mu}_{\gP} = \bm{\mu}_{\gB} - (1-\eps)\bm{\delta} $. In the following, we prove 
	$\circled{2}$: Given $|\la\bm{r} - \bm{\mu}_{\gB}, \bm{v}\ra + (1-\eps)\la\bm{\delta}, \bm{v}\ra|< t$, we have $-t - (1-\eps)\la\bm{\delta}, \bm{v}\ra<\la\bm{r} - \bm{\mu}_{\gB}, \bm{v}\ra < t - (1-\eps)\la\bm{\delta}, \bm{v}\ra$. Since $t = |\eps\langle \bm{\delta} , \bm{v}\rangle| + \frac{\sigma}{\sqrt{\eps}}$, $- |\eps\langle \bm{\delta} , \bm{v}\rangle| - \frac{\sigma}{\sqrt{\eps}} - (1-\eps)\la\bm{\delta}, \bm{v}\ra<\la\bm{r} - \bm{\mu}_{\gB}, \bm{v}\ra <  |\eps\langle \bm{\delta} , \bm{v}\rangle| + \frac{\sigma}{\sqrt{\eps}} - (1-\eps)\la\bm{\delta}, \bm{v}\ra$. 
	
	Then, we consider two cases: If $\la\bm{\delta}, \bm{v}\ra \geq 0$, we have $\la\bm{r} - \bm{\mu}_{\gB}, \bm{v}\ra < \frac{\sigma}{\sqrt{\eps}} - (1-2\eps)|\la\bm{\delta}, \bm{v}\ra|$.
	Given Corollary~\ref{cor:sep}, we have $|\la\bm{r} - \bm{\mu}_{\gB}, \bm{v}\ra| > (1-2\eps)|\la\bm{\delta}, \bm{v}\ra|  - \frac{\sigma}{\sqrt{\eps}}$. If $\la\bm{\delta}, \bm{v}\ra < 0$, we have $\la\bm{r} - \bm{\mu}_{\gB}, \bm{v}\ra  > (1-2\eps)|\la\bm{\delta}, \bm{v}\ra|  - \frac{\sigma}{\sqrt{\eps}}$. 
	Given Corollary~\ref{cor:sep}, we also have $|\la\bm{r} - \bm{\mu}_{\gB}, \bm{v}\ra| > (1-2\eps)|\la\bm{\delta}, \bm{v}\ra|  - \frac{\sigma}{\sqrt{\eps}}$. Therefore,  $\circled{2}$ holds true. $\circled{3}$ is because of Corollary~\ref{cor:sep}.
	
	Suppose after filtering out the data that satisfies $|\la\bm{r} - \bm{\mu}_{\gP}, \bm{v}\ra| \geq t$, the remaining deviation caused by $\gB$ is expected to be
	\begin{align}
		|\eps&\mathbb{E}_{\bm{r}\sim \gB, |\la\bm{r} - \bm{\mu}_{\gP}, \bm{v}\ra|< t}[\bm{r}]| < \eps t\mathbb{P}_{\bm{r}\sim \gB}[|\la\bm{r} - \bm{\mu}_{\gP}, \bm{v}\ra| < t] \nonumber\\
		&\leq 4\eps^2 t = 4\eps^2(|\eps\langle \bm{\delta} , \bm{v}\rangle| + \frac{\sigma}{\sqrt{\eps}}) 
	\end{align}
	Since $\frac{\sigma}{\sqrt{\eps}} \leq \frac{2}{3}|\eps\langle \bm{\delta} , \bm{v}\rangle|$ according to Corollary~\ref{cor:sep}, we have
	\begin{align}
		|\eps&\mathbb{E}_{\bm{r}\sim \gB, |\la\bm{r} - \bm{\mu}_{\gP}, \bm{v}\ra|< t}[\bm{r}]| \leq \frac{20}{3}\eps^3|\langle \bm{\delta} , \bm{v}\rangle| \leq \frac{20}{3}\eps^3\|\bm{\delta}\|_2
	\end{align}
	Since $\eps\leq\frac{1}{10}$ and $\|\bm{\delta}\|_2\sim \Theta(\sqrt{d}_{\bm{r}})$, we have
	\begin{align}
		|\eps&\mathbb{E}_{\bm{r}\sim \gB, |\la\bm{r} - \bm{\mu}_{\gP}, \bm{v}\ra|< t}[\bm{r}]| \sim \Theta(\eps^2\sqrt{d}_{\bm{r}}).
	\end{align}
\end{proof}

\end{document}
