\section{Hessian for \texorpdfstring{$X$}{} and \texorpdfstring{$Y$}{}}\label{sec:hessian_XY}
In Section~\ref{sub:hessian_XY:hessian}, we compute the Hessian matrix with respect to both $X$ and $Y$. In Section~\ref{sub:hessian_XY:help_lem}, we present several helpful lemmas for the following proof. In Section~\ref{sub:hessian_XY:B}, we create $B(x)$ for the further analysis.

\subsection{Computing Hessian}\label{sub:hessian_XY:hessian}

In this section, we compute the Hessian matrix for $X$ and $Y$.

\begin{lemma}\label{lem:hessian_xy}
If the following conditions hold
\begin{itemize}
    \item Let $f(x)_{j_0}$ be defined as Definition~\ref{def:f}.
    \item Let $c(x,y)_{j_0,i_0}$ be defined as Definition~\ref{def:c}.
    \item Let $h(y)_{i_0}$ be defined as Definition~\ref{def:h}.
    \item Let $L_{j_0,i_0}$ be defined as Definition~\ref{def:L}.
    
\end{itemize}
Then, we have
\begin{itemize}
    \item Part 1.
    \begin{align*}
        \frac{\d }{\d y_{i_0,i_1} } ( \frac{\d }{\d x_i} L_{j_0,i_0} )  
        = & ~ \langle f(x)_{j_0}, A_{3,*,i_1} \rangle \cdot \langle f(x)_{j_0} \circ \A_{j_0,i} , h(y)_{i_0} \rangle \\
& ~ - \langle f(x)_{j_0}, A_{3,*,i_1} \rangle \langle f(x)_{j_0}, h(y)_{i_0} \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle \\
& ~ + c(x,y)_{j_0, i_0} \cdot ( \langle  f(x)_{j_0} \circ \A_{j_0,i}, A_{3,*,i_1} \rangle - \langle  f(x)_{j_0} , A_{3,*,i_1} \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle )
    \end{align*}
\end{itemize}
\end{lemma}
\begin{proof}

We can show
\begin{align*}
 & ~ \frac{\d }{\d y_{i_0,i_1} } ( \frac{\d }{\d x_i} L_{j_0,i_0} ) \\
= & ~ \frac{\d }{\d y_{i_0,i_1}} (  c(x,y)_{j_0, i_0} \cdot ( \langle  f(x)_{j_0} \circ \A_{j_0,i}, h(y)_{i_0} \rangle - \langle  f(x)_{j_0} , h(y)_{i_0} \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle ) ) \\
= & \frac{\d }{\d y_{i_0,i_1}} (  c(x,y)_{j_0, i_0} ) \cdot ( \langle  f(x)_{j_0} \circ \A_{j_0,i}, h(y)_{i_0} \rangle - \langle  f(x)_{j_0} , h(y)_{i_0} \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle )  \\
& ~ +(  c(x,y)_{j_0, i_0}) \cdot  \frac{\d }{\d y_{i_0,i_1}}  ( \langle  f(x)_{j_0} \circ \A_{j_0,i}, h(y)_{i_0} \rangle - \langle  f(x)_{j_0} , h(y)_{i_0} \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle )  \\
= & ~ \langle f(x)_{j_0}, A_{3,*,i_1} \rangle \cdot ( \langle  f(x)_{j_0} \circ \A_{j_0,i}, h(y)_{i_0} \rangle - \langle  f(x)_{j_0} , h(y)_{i_0} \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle )   \\
& ~ + c(x,y)_{j_0, i_0} \cdot ( \langle  f(x)_{j_0} \circ \A_{j_0,i}, A_{3,*,i_1} \rangle - \langle  f(x)_{j_0} , A_{3,*,i_1} \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle ) \\
= & ~ \langle f(x)_{j_0}, A_{3,*,i_1} \rangle \cdot \langle f(x)_{j_0} \circ \A_{j_0,i} , h(y)_{i_0} \rangle \\
& ~ - \langle f(x)_{j_0}, A_{3,*,i_1} \rangle \langle f(x)_{j_0}, h(y)_{i_0} \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle \\
& ~ + c(x,y)_{j_0, i_0} \cdot ( \langle  f(x)_{j_0} \circ \A_{j_0,i}, A_{3,*,i_1} \rangle - \langle  f(x)_{j_0} , A_{3,*,i_1} \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle )
\end{align*}
where the first step is due to {\bf Part 6} of Lemma~\ref{lem:gradient_x}, the second step comes from the product rule of derivative, the third step is based on Lemma~\ref{lem:hessian_y_j_0_i_0}, and the last step follows from simple algebra.

Thus, we complete the proof.
\end{proof}

\subsection{A Helpful Lemma}\label{sub:hessian_XY:help_lem}

In this section, we provide a helpful Lemma.

\begin{lemma}\label{lem:hessian_xy_help_lem}
If the following conditions hold
\begin{itemize}
    \item Let $f(x)_{j_0}$ be defined in Definition~\ref{def:f}.
    \item Let $\A \in \R^{n^2 \times d^2}$ be defined in Definition~\ref{def:u}.
    \item Let $c(x,y)_{j_0,i_0}$ be defined as Definition~\ref{def:c}.
    \item Let $h(y)_{i_0}$ be defined as Definition~\ref{def:h}.
    \item Let $L_{j_0,i_0}$ be defined as Definition~\ref{def:L}.
\end{itemize}
Then, we have
\begin{itemize}
    \item {\bf Part 1.} 
    \begin{align*}
        \langle f(x)_{j_0}, A_{3,*,i_1} \rangle \cdot \langle f(x)_{j_0} \circ \A_{j_0,i} , h(y)_{i_0} \rangle = \A_{j_0,i}^\top (f(x)_{j_0} \circ h(y)_{i_0}) f(x)_{j_0}^\top A_{3,*,i_1} 
    \end{align*}
    \item {\bf Part 2.}
    \begin{align*}
         \langle f(x)_{j_0}, A_{3,*,i_1} \rangle \cdot \langle f(x)_{j_0}, h(y)_{i_0} \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle  = \langle f(x)_{j_0}, h(y)_{i_0} \rangle \cdot \A_{j_0,i}^\top f(x)_{j_0} f(x)_{j_0}^\top A_{3,*,i_1}
    \end{align*}
    \item {\bf Part 3.}
    \begin{align*}
         \langle f(x)_{j_0} \circ \A_{j_0,i}^{\top}, A_{3,*,i_1} \rangle =  \A_{j_0,i} ^{\top}\diag(f(x)_{j_0}) A_{3,*,i_1}
    \end{align*}
    \item {\bf Part 4.}
    \begin{align*}
         \langle  f(x)_{j_0} , A_{3,*,i_1} \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle = \A_{j_0,i}^{\top} f(x)_{j_0} f(x)_{j_0}^\top A_{3,*,i_1}
    \end{align*}
\end{itemize}
\end{lemma}
\begin{proof}
{\bf Proof of Part 1.}
\begin{align*}
     \langle f(x)_{j_0}, A_{3,*,i_1} \rangle \cdot \langle f(x)_{j_0} \circ \A_{j_0,i} , h(y)_{i_0} \rangle 
     = & ~ \langle f(x)_{j_0} \circ  h(y)_{i_0} , \A_{j_0,i} \rangle f(x)_{j_0}^{\top}  A_{3,*,i_1} \\ 
     = & ~ \A_{j_0,i}^{\top}(f(x)_{j_0} \circ  h(y)_{i_0})  f(x)_{j_0}^{\top}  A_{3,*,i_1}
\end{align*}
where the first step follows from Fact~\ref{fac:circ_rules}, and the second step follows from Fact~\ref{fac:circ_rules}.

{\bf Proof of Part 2.}
\begin{align*}
     \langle f(x)_{j_0}, A_{3,*,i_1} \rangle \cdot \langle f(x)_{j_0}, h(y)_{i_0} \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle  
     = & ~  \langle f(x)_{j_0}, h(y)_{i_0} \rangle \A_{j_0,i}^{\top} f(x)_{j_0} f(x)_{j_0}^{\top} A_{3,*,i_1}
\end{align*}
where the first step follows from Fact~\ref{fac:circ_rules}.

{\bf Proof of Part 3.}
\begin{align*}
    \langle f(x)_{j_0} \circ \A_{j_0,i}, A_{3,*,i_1} \rangle = & ~  (f(x)_{j_0} \circ \A_{j_0,i})^{\top} A_{3,*,i_1} \\
    = & ~ (\diag(f(x)_{j_0}) \A_{j_0,i})^{\top} A_{3,*,i_1}\\
    = & ~ \A_{j_0,i}^{\top} \diag(f(x)_{j_0}) A_{3,*,i_1}
\end{align*}
where the first, second, and last step follows from Fact~\ref{fac:circ_rules}.


{\bf Proof of Part 4.}
\begin{align*}
    \langle  f(x)_{j_0} , A_{3,*,i_1} \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle = \A_{j_0,i}^{\top} f(x)_{j_0} f(x)_{j_0}^\top A_{3,*,i_1}
\end{align*}
where the first step follows from Fact~\ref{fac:circ_rules}.

\end{proof}

\subsection{Creating \texorpdfstring{$B(x,y)$}{}}\label{sub:hessian_XY:B}

In this section, we give a formal definition of $B(x,y)$.

\begin{definition}\label{def:B(x,y)}
We define $B(x,y)$ 
\begin{align*}
    B(x,y) = B_{\diag}^1 + B_{\rank}^1 + B_{\rank}^2 + B_{\rank}^1
\end{align*}
where
\begin{itemize}
    \item $B_{\rank}^1 (x,y) = ( f(x)_{j_0} \circ h(y)_{i_0} ) f(x)_{j_0}^\top$
    \item $B_{\rank}^2(x,y) = - \langle f(x)_{j_0}, h(y)_{i_0} \rangle f(x)_{j_0} f(x)_{j_0}^\top$
    \item $B_{\diag}^1(x,y) = - c(x,y)_{j_0,i_0} \diag( f(x)_{j_0} ) $
    \item $B_{\rank}^3(x,y) = c(x,y)_{j_0,i_0} f(x)_{j_0} f(x)_{j_0}^\top$
\end{itemize}

\end{definition}

\begin{lemma}
If the following conditions 
\begin{itemize}
    \item Let $B(x,y)$ be defined as Definition~\ref{def:B(x,y)}.
\end{itemize}
Then, we have 
\begin{itemize}
\item {\bf Part 1.} 
\begin{align*}
   \frac{\d^2 L_{j_0,i_0}}{ \d y_{i_0} \d x} = \A_{j_0}^\top B(x,y) A_3 \in \R^{d^2 \times d}
\end{align*}
\item {\bf Part 2.} $i_1 \neq i_0$
\begin{align*}
   \frac{\d^2 L_{j_0,i_0}}{ \d y_{i_1} \d x} = \A_{j_0}^\top {\bf 0}_{n \times n} A_3 \in \R^{d^2 \times d} = {\bf 0}_{d^2 \times d}
\end{align*}
\end{itemize}
\end{lemma}
\begin{proof}
{\bf Proof of Part 1.}
We have
\begin{align*}
    \frac{\d^2 L_{j_0,i_0}}{ \d y_{i_0,i_2} \d x_i} 
    = & ~ \A^{\top}_{j_0,i} B(x,y) A_{3,*,i_2} \\
\end{align*}
where the first step follows from combining Lemma~\ref{lem:hessian_xy} and Lemma~\ref{lem:hessian_xy_help_lem}.

Then, we can have
\begin{align*}
    \frac{\d^2 L_{j_0,i_0}}{ \d y_{i_0} \d x} = \A_{j_0}^\top B(x,y) A_3 
\end{align*}

{\bf Proof of Part 2.}
We have
\begin{align*}
    \frac{\d^2 L_{j_0,i_0}}{ \d y_{i_1,i_2} \d x_i} 
    = & ~ \A^{\top}_{j_0,i} {\bf 0}_{n \times n} A_{3,*,i_2} = {\bf 0}_{n \times n} \\
\end{align*}
where the first step follows from combining Lemma~\ref{lem:hessian_xy} and Lemma~\ref{lem:hessian_xy_help_lem}.

Then, we can have
\begin{align*}
    \frac{\d^2 L_{j_0,i_0}}{ \d y_{i_1} \d x} = \A_{j_0}^\top {\bf 0}_{n \times n} A_3 = {\bf 0}_{n \times n}
\end{align*}
\end{proof}

