\section{Hessian for \texorpdfstring{$X$}{}}\label{sec:hessian_X}
In Section~\ref{sub:hessian_x:hessian}, we compute the Hessian matrix with respect to $x$. In Section~\ref{sub:hessian_x:help_lem}, we present a helpful lemma to simplify the Hessian. In Section~\ref{sub:hessian_x:B}, we define $B(x)$, representing the Hessian.

\subsection{Hessian}\label{sub:hessian_x:hessian}

Now, we start to compute the Hessian matrix with respect to $x$.


\begin{lemma}\label{lem:hessian_l}
If the following conditions hold
\begin{itemize}
    \item Let $\gamma(x)_{j_0} := \langle f(x)_{j_0}, v \rangle$ 
    (We define this notation for easy of writing proofs.)
\end{itemize}
Then we have for each $i \in [d^2]$, $l \in [d^2]$
\begin{itemize}
    \item Part 1. $i = l$ Hessian diagonal term
    \begin{align*}
        \frac{\d^2 L_{j_0,i_0} }{\d x_i \d x_i} = & ~  ( \langle  f(x)_{j_0} \circ \A_{j_0,i}, v \rangle - \gamma_{j_0}(x) \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle )^2 \\
        & ~ + c(x,:)_{j_0,i_0} \cdot \\
        & ~ ( \\
        & ~ + \langle f(x)_{j_0} \circ \A_{j_0,i} \circ \A_{j_0,i} , v \rangle (1-\gamma_{j_0}(x)) \\
        & ~ - 2 \langle f(x)_{j_0} \circ \A_{j_0,i}, v \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle \\
        & ~ + 2 \langle f(x)_{j_0}, \A_{j_0,i} \rangle^2 \cdot \gamma_{j_0}(x) \\
        & ~ ) 
    \end{align*}
    \item Part 2. $i \neq l$ Hessian off-diagonal term
    \begin{align*}
        \frac{ \d^2 L_{j_0,i_0} }{ \d x_i \d x_l } = & ~  ( \langle  f(x)_{j_0} \circ \A_{j_0,i}, v \rangle - \gamma_{j_0}(x) \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle )  \\
        & ~ \cdot ( \langle  f(x)_{j_0} \circ \A_{j_0,l}, v \rangle - \gamma_{j_0}(x) \cdot \langle f(x)_{j_0}, \A_{j_0,l} \rangle )  \\
        & ~ + c(x,:)_{j_0,i_0} \cdot \\
        & ~ ( \\
        & ~ + \langle f(x)_{j_0} \circ \A_{j_0,i} \circ \A_{j_0,l} , v \rangle (1-\langle f(x)_{j_0}, v\rangle)) \\
        & ~ - \langle f(x)_{j_0} \circ \A_{j_0,i}, v \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,l} \rangle  - \langle f(x)_{j_0} \circ \A_{j_0,l}, v \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle \\
        & ~ + 2\langle f(x)_{j_0}, \A_{j_0,i} \rangle \langle f(x)_{j_0}, \A_{j_0,l} \rangle \cdot \gamma_{j_0}(x) \\
& ~ ) 
    \end{align*}
\end{itemize}
\end{lemma}
\begin{proof}

{\bf Proof of Part 1.}


At first, we have
\begin{align*}
& ~ \frac{\d}{\d x_i}  ( \langle  f(x)_{j_0} \circ \A_{j_0,i}, v \rangle - \langle  f(x)_{j_0} , v \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle ) \\
= & ~ \underbrace{ \frac{\d}{\d x_i}  \langle  f(x)_{j_0} \circ \A_{j_0,i}, v \rangle }_{\mathrm{Part~7~of~Lemma~\ref{lem:gradient_x}}}  \\
& ~ - \underbrace{ ( \frac{\d }{\d x_i} \langle f(x)_{j_0}, v \rangle ) }_{ {\mathrm{Part~4~of~Lemma~\ref{lem:gradient_x}}} } \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle \\
& ~ - \underbrace{ ( \frac{\d }{\d x_i}  \langle f(x)_{j_0}, \A_{j_0,i} \rangle ) }_{ {\mathrm{Part~9~of~Lemma~\ref{lem:gradient_x}}} } \cdot \langle f(x)_{j_0}, v \rangle \\
= & ~ \langle f(x)_{j_0} \circ \A_{j_0,i} \circ \A_{j_0,i}, v \rangle - \langle f(x)_{j_0} \circ \A_{j_0,i} , v \rangle \cdot \langle f(x)_{j_0} , \A_{j_0, i} \rangle \\
& ~ -  ( \langle f(x)_{j_0} \circ \A_{j_0,i}, v \rangle - \langle f(x)_{j_0}, v \rangle \cdot \langle f(x)_{j_0}, \A_{j_0, i} \rangle ) \cdot \langle f(x)_{j_0} , \A_{j_0,i} \rangle \\
& ~ - ( \langle f(x)_{j_0} \circ \A_{j_0,i} , \A_{j_0,i} \rangle - \langle f(x)_{j_0}, \A_{j_0,i} \rangle \langle f(x)_{j_0}, \A_{j_0,i} \rangle ) \cdot \langle f(x)_{j_0} , v \rangle \\
= & ~ \langle f(x)_{j_0} \circ \A_{j_0,i} \circ \A_{j_0,i} , v \rangle \\
& ~ - 2 \langle f(x)_{j_0} \circ \A_{j_0,i}, v \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle \\
& ~ + 2 \langle f(x)_{j_0}, \A_{j_0,i} \rangle^2 \cdot \langle f(x)_{j_0}, v \rangle \\
& ~ - \langle f(x)_{j_0} \circ \A_{j_0,i} \circ \A_{j_0,i}, v \rangle \cdot \langle f(x)_{j_0}, v\rangle
\end{align*}
where the first step is based on the product rule of derivative, the second step comes from {\bf Part 4, Part 7, and Part 9} of Lemma~\ref{lem:gradient_x}, and the last step is due to simple algebra.

Then we can show that
\begin{align*}
& ~ \frac{\d }{\d x_i} ( \frac{\d }{\d x_i} L_{j_0,i_0} ) \\
= & ~ \frac{\d }{\d x_i} (  c(x,:)_{j_0, i_0} \cdot ( \langle  f(x)_{j_0} \circ \A_{j_0,i}, v \rangle - \langle  f(x)_{j_0} , v \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle ) ) \\
= & ~ ( \langle  f(x)_{j_0} \circ \A_{j_0,i}, v \rangle - \langle  f(x)_{j_0} , v \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle ) ^2 \\
& ~ + c(x,:)_{j_0, i_0} \cdot \frac{\d}{\d x_i}  ( \langle  f(x)_{j_0} \circ \A_{j_0,i}, v \rangle - \langle  f(x)_{j_0} , v \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle ),
\end{align*}
where the first step comes from {\bf Part 6} of Lemma~\ref{lem:gradient_x} and the second step is due to {\bf Part 5} of Lemma~\ref{lem:gradient_x}.

Combining the above two equations, we complete the proof.

{\bf Proof of Part 2.}

Firstly, we can show that
\begin{align*}
& ~ \frac{\d}{\d x_l}  ( \langle  f(x)_{j_0} \circ \A_{j_0,i}, v \rangle - \langle  f(x)_{j_0} , v \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle ) \\
= & ~ \underbrace{ \frac{\d}{\d x_l}  \langle  f(x)_{j_0} \circ \A_{j_0,i}, v \rangle }_{\mathrm{Part~8~of~Lemma~\ref{lem:gradient_x}}}  \\
& ~ - \underbrace{ ( \frac{\d }{\d x_l} \langle f(x)_{j_0}, v \rangle ) }_{ {\mathrm{Part~4~of~Lemma~\ref{lem:gradient_x}}} } \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle \\
& ~ - \underbrace{ ( \frac{\d }{\d x_l}  \langle f(x)_{j_0}, \A_{j_0,i} \rangle ) }_{ {\mathrm{Part~10~of~Lemma~\ref{lem:gradient_x}}} } \cdot \langle f(x)_{j_0}, v \rangle \\
= & ~ \langle f(x)_{j_0} \circ \A_{j_0,i} \circ \A_{j_0,l}, v \rangle - \langle f(x)_{j_0} \circ \A_{j_0,l} , v \rangle \cdot \langle f(x)_{j_0} , \A_{j_0, i} \rangle \\
& ~ -  ( \langle f(x)_{j_0} \circ \A_{j_0,l}, v \rangle - \langle f(x)_{j_0}, v \rangle \cdot \langle f(x)_{j_0}, \A_{j_0, l} \rangle ) \cdot \langle f(x)_{j_0} , \A_{j_0,i} \rangle \\
& ~ - ( \langle f(x)_{j_0} \circ \A_{j_0,i} , \A_{j_0,l} \rangle - \langle f(x)_{j_0}, \A_{j_0,i} \rangle \langle f(x)_{j_0}, \A_{j_0,l} \rangle ) \cdot \langle f(x)_{j_0} , v \rangle \\
= & ~ \langle f(x)_{j_0} \circ \A_{j_0,i} \circ \A_{j_0,l} , v \rangle \\
& ~ - \langle f(x)_{j_0} \circ \A_{j_0,i}, v \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,l} \rangle - \langle f(x)_{j_0} \circ \A_{j_0,l}, v \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle \\
& ~ + 2\langle f(x)_{j_0}, \A_{j_0,i} \rangle \langle f(x)_{j_0}, \A_{j_0,l} \rangle \cdot \langle f(x)_{j_0}, v \rangle \\
& ~ - \langle f(x)_{j_0} \circ \A_{j_0,i} \circ \A_{j_0,l}, v \rangle \cdot \langle f(x)_{j_0}, v\rangle
\end{align*}
where the first step is owing to the product rule of derivative, the second step is based on {\bf Part 4, Part 8, and Part 10} of Lemma~\ref{lem:gradient_x}, and the last step comes from simple algebra.

We have
\begin{align*}
& ~ \frac{\d }{\d x_l} ( \frac{\d }{\d x_i} L_{j_0,i_0} ) \\
= & ~ \frac{\d }{\d x_l} (  c(x,:)_{j_0, i_0} \cdot ( \langle  f(x)_{j_0} \circ \A_{j_0,i}, v \rangle - \langle  f(x)_{j_0} , v \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle ) ) \\
= & ~ ( \langle  f(x)_{j_0} \circ \A_{j_0,i}, v \rangle - \langle  f(x)_{j_0} , v \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle )  \\
& ~ \cdot ( \langle  f(x)_{j_0} \circ \A_{j_0,l}, v \rangle - \langle  f(x)_{j_0} , v \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,l} \rangle ) \\
& ~ + c(x,:)_{j_0, i_0} \cdot \frac{\d}{\d x_l}  ( \langle  f(x)_{j_0} \circ \A_{j_0,i}, v \rangle - \langle  f(x)_{j_0} , v \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle )
\end{align*}

Combining the above two equations, we complete the proof.
\end{proof}


\subsection{A Helpful Lemma}\label{sub:hessian_x:help_lem}

In this section, we present a helpful Lemma.

\begin{lemma}\label{lem:helpful_lemma}
We have
\begin{itemize}
    \item Part 1.
    \begin{align*}
        \langle f(x)_{j_0} \circ \A_{j_0,i} \circ \A_{j_0,l} , v \rangle = \underbrace{ \A_{j_0,i}^\top }_{d^2 \times n} \underbrace{ \diag ( f(x)_{j_0} \circ v ) }_{n \times n} \underbrace{ \A_{j_0,l} }_{n \times d^2}
    \end{align*}
    \item Part 2.
    \begin{align*}
        & ~ \langle f(x)_{j_0} \circ \A_{j_0,i}, v \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,l} \rangle  + \langle f(x)_{j_0} \circ \A_{j_0,l}, v \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle \\
        = & ~ \A_{j_0,i}^\top ( \underbrace{ ( f(x)_{j_0} \circ v ) ( f(x)_{j_0} )^\top + f(x)_{j_0} (f(x)_{j_0} \circ v )^\top }_{\rank-2} ) \A_{j_0,l}
    \end{align*}
    \item Part 3.
    \begin{align*}
        \langle f(x)_{j_0} \circ \A_{j_0,i} ,  v \rangle \cdot \langle f(x)_{j_0} \circ \A_{j_0,l} ,  v \rangle = \A_{j_0,i}^\top \underbrace{ ( f(x)_{j_0} \circ v ) ( f(x)_{j_0} \circ v )^\top }_{\rank-1} \A_{j_0,l}
    \end{align*}
    \item Part 4.
    \begin{align*}
        \langle f(x)_{j_0} , \A_{j_0,i} \rangle \cdot \langle f(x)_{j_0} , \A_{j_0,l}  \rangle = \A_{j_0,i}^\top \underbrace{ ( f(x)_{j_0}  ) ( f(x)_{j_0}  )^\top }_{\rank-1} \A_{j_0,l}
    \end{align*}
\end{itemize}
\end{lemma}
\begin{proof}

{\bf Proof of Part 1.}
We have
\begin{align*}
     \langle f(x)_{j_0} \circ \A_{j_0,i} \circ \A_{j_0,l} , v \rangle 
     = & ~  \A_{j_0,i}^\top \diag ( f(x)_{j_0} \circ v ) \A_{j_0,l} 
\end{align*}
where the first step follows from Fact~\ref{fac:circ_rules}.

{\bf Proof of Part 2.}
We have
\begin{align*}
    & ~\langle f(x)_{j_0} \circ \A_{j_0,i}, v \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,l} \rangle  + \langle f(x)_{j_0} \circ \A_{j_0,l}, v \rangle \cdot \langle f(x)_{j_0}, \A_{j_0,i} \rangle \\ 
    = & ~ \langle f(x)_{j_0} \circ v , \A_{j_0,i} \rangle \cdot f(x)_{j_0}^{\top} \A_{j_0,l} \\
     & ~ + \langle f(x)_{j_0} \circ v, \A_{j_0,l} \rangle \cdot \A_{j_0,i}^{\top} \cdot f(x)_{j_0} \\
    = & ~ \A_{j_0,i}^{\top} \cdot (f(x)_{j_0} \circ v)(f(x)_{j_0})^{\top} \A_{j_0,i} \\
     & ~ +\A_{j_0,i}^{\top} f(x)_{j_0}(f(x)_{j_0} \circ v)^{\top} \A_{j_0,l} \\
    = & ~ \A_{j_0,i}^\top (  ( f(x)_{j_0} \circ v ) ( f(x)_{j_0} )^\top \\
     & ~ + f(x)_{j_0} (f(x)_{j_0} \circ v )^\top  ) \A_{j_0,l}
\end{align*}
where the first step follows from Fact~\ref{fac:circ_rules}, the second step follows from Fact~\ref{fac:circ_rules}, and the last step follows from the simple algebra. 

{\bf Proof of Part 3.}
We have
\begin{align*}
    \langle f(x)_{j_0} \circ \A_{j_0,i} ,  v \rangle \cdot \langle f(x)_{j_0} \circ \A_{j_0,l} ,  v \rangle
    = & ~ \langle f(x)_{j_0} \circ v ,  \A_{j_0,i} \rangle \langle f(x)_{j_0} \circ v ,  \A_{j_0,l} \rangle \\
    = & ~ \A_{j_0,i}^\top  ( f(x)_{j_0} \circ v ) ( f(x)_{j_0} \circ v )^\top  \A_{j_0,l}
\end{align*}
where the first step follows from Fact~\ref{fac:circ_rules}, and the last step follows from Fact~\ref{fac:circ_rules}.

{\bf Proof of Part 4.}
We have
\begin{align*}
    \langle f(x)_{j_0} , \A_{j_0,i} \rangle \cdot \langle f(x)_{j_0} , \A_{j_0,l}  \rangle
    = & ~ \A_{j_0,i}^{\top} f(x)_{j_0}f(x)_{j_0}^{\top} \A_{j_0,l}
\end{align*}
where the first step follows from Fact~\ref{fac:circ_rules}.
\end{proof}

\subsection{Defining \texorpdfstring{$B(x)$}{}}\label{sub:hessian_x:B}

In this section, we formally define $B(x)$.

\begin{definition}\label{def:B(x)}
If the following conditions hold
\begin{itemize}
    \item  Let $\gamma_{j_0}(x) = \langle f(x)_{j_0},v \rangle$
\end{itemize}
We define $B(x) \in \R^{n \times n}$ as follows
\begin{align*}
    B(x) := & B_{\diag}^1 \\
     & ~ + B_{\rank}^1 + B_{\rank}^2 + B_{\rank}^3
\end{align*}
where
\begin{itemize}
    \item $B_{\diag}^1 := (1-\gamma_{j_0}(x)) \cdot c(x,:)_{j_0,i_0} \cdot \diag( f(x)_{j_0} \circ v )$
\end{itemize}
and 
\begin{itemize}
    \item $B_{\rank}^1 := -( 2 \gamma_{j_0}(x) + c(x,:)_{j_0,i_0}) \cdot ( ( f(x)_{j_0} \circ v ) f(x)_{j_0}^\top + f(x)_{j_0} (f(x)_{j_0} \circ v)^\top )$
    \item $B_{\rank}^2 := ( 2 \gamma_{j_0}(x) c(x,:)_{j_0,i_0} + \gamma_{j_0}(x)^2 ) \cdot f(x)_{j_0} f(x)_{j_0}^\top $
    \item $B_{\rank}^3 := (f(x)_{j_0} \circ v) \cdot ( f(x)_{j_0} \circ v )^\top$
\end{itemize}
\end{definition}

\begin{lemma}
Let $B(x)$ be defined as Definition~\ref{def:B(x)}, then we have
\begin{align*}
 \frac{\d^2 L_{j_0,i_0}}{\d x \d x} = \underbrace{ \A_{j_0}^\top }_{d^2 \times n} \underbrace{ B(x) }_{n \times n} \underbrace{ \A_{j_0} }_{n \times d^2}
\end{align*}
\end{lemma}
\begin{proof}
The proof follows by combining Lemma~\ref{lem:hessian_l} and Lemma~\ref{lem:helpful_lemma}.
\end{proof}