\section{Lipschitz for Hessian of \texorpdfstring{$x,y$}{}}\label{sec:lips_H_xy}
In Section~\ref{sub:lips_H_xy:main_res}, we present the main results of the Lipschitz property of $H_{x,y}$. In Section~\ref{sub:lips_H_xy:summary}, we summarize the results from the following steps 1-4. In Section~\ref{sub:lips_H_xy:upper_bound}, we compute the upper bound of basic functions for the following proof. In Section~\ref{sub:lips_H_xy:basic_lips}, we compute the Lipschitz Property of basic functions for the following proof. In Section~\ref{sub:lips_H_xy:step1}, we analyze the first step of Lipschitz function $( f(x)_{j_0} \circ h(y)_{i_0} ) f(x)_{j_0}^\top$. In Section~\ref{sub:lips_H_xy:step2}, we analyze the second step of Lipschitz function $- \langle f(x)_{j_0}, h(y)_{i_0} \rangle f(x)_{j_0} f(x)_{j_0}^\top$. In Section~\ref{sub:lips_H_xy:step3}, we analyze the third step of Lipschitz function $- c(x,y)_{j_0,i_0} \diag( f(x)_{j_0} ) $. In Section~\ref{sub:lips_H_xy:step4}, we analyze the fourth step of Lipschitz function $c(x,y)_{j_0,i_0} f(x)_{j_0} f(x)_{j_0}^\top $. In Section~\ref{sub:lips_H_xy:psd}, we compute the PSD upper bound for the Hessian matrix. In Section~\ref{sub:lips_H_xy:summary_psd}, we summarize PSD upper bound of $G(x,y)$.



\subsection{Main Results}\label{sub:lips_H_xy:main_res}


In this section, we present the main result of Section~\ref{sec:lips_H_xy}.

\begin{lemma} \label{lem:lips_H_xy}
If the following conditions hold
 \begin{itemize}
    \item $\max_{j_0 \in [n]} \| \A_{j_0}  \| \leq R$
    \item Let $H(x,y)_{j_0,i_0} \in \R^{d^2 \times d}$ denote $\frac{\d^2 L_{j_0,i_0}}{ \d x \d y_{i_0} }$
    \item $\frac{\d^2 L_{j_0,i_0}}{ \d x \d y_{i_1}} = {\bf 0}_{d^2 \times d}$
    \item Let $H(x,y) \in \R^{d^2 \times d^2}$ be 
    \begin{align*}
    H(x,y) : =
    \begin{bmatrix}
        \sum_{j_0=1}^n H_{j_0,1}(x,y) & \sum_{j_0=1}^n H_{j_0,2}(x,y) & \cdots & \sum_{j_0=1}^n H_{j_0,d}(x,y)
    \end{bmatrix}
    \end{align*}
\end{itemize} 

Then we have
\begin{itemize}
    \item Part 1. For $j_0 \in [d], i_0 \in [n]$ 
    \begin{align*}
    \|H(x,y)_{j_0,i_0} - H(\wt{x},\wt{y})_{j_0,i_0} \| \leq n^{1.5} \exp(20R^2) \cdot ( \|x -\wt{x} \|_2 +   \| y -\wt{y} \|_2 )
\end{align*}
    \item Part 2. \begin{align*}
    \|H(x,y) - H(\wt{x},\wt{y}) \| \leq n^{2.5} d \exp(20R^2)(  \|x -\wt{x} \|_2 + \| y -\wt{y} \|_2)
\end{align*}
\end{itemize}
\end{lemma}
\begin{proof}
    {\bf Proof of Part 1.}
    It follows from Lemma~\ref{lem:summary_Gi_xy}.

    {\bf Proof of Part 2}.
    We can show that
    \begin{align*}
        \| H(x,y) - H(\wt{x},\wt{y}) \| \leq nd \cdot n^{1.5} \exp(20R^2) ( \| x - \wt{x} \|_2 + \| y - \wt{y} \|_2 )
    \end{align*}
    where the first step follows from that  we can write $H$ as summation of $nd$ terms $H_{j_0,i_0}$ for all $j_0 \in [d]$, $i_0 \in [d]$.
\end{proof}

\subsection{Summary of Four Steps on Lipschitz for Matrix Functions}\label{sub:lips_H_xy:summary}

In this section, we summarize the four steps for analyzing the Lipschitz for different matrix functions.

\begin{lemma}\label{lem:summary_Gi_xy}
    If the following conditions hold 
    \begin{itemize}
        \item $G_{1}(x,y) = ( f(x)_{j_0} \circ h(y)_{i_0} ) f(x)_{j_0}^\top
        $
        \item $G_{2}(x,y) = - \langle f(x)_{j_0}, h(y)_{i_0} \rangle f(x)_{j_0} f(x)_{j_0}^\top$
        \item $G_3(x,y) = - c(x,y)_{j_0,i_0} \diag( f(x)_{j_0} ) $
        \item $G_4(x,y) = c(x,y)_{j_0,i_0} f(x)_{j_0} f(x)_{j_0}^\top$
        
    \end{itemize}

    Then, we have
    \begin{align*}
        \sum_{k=1}^4 \|G_{k }(x,y) - G_{k}(\wt{x}, \wt{y})\| \leq n^{1.5} \exp(20R^2) ( \|x -\wt{x} \|_2 +  \| y -\wt{y} \|_2 )
    \end{align*}
\end{lemma}
\begin{proof}
    The proof follows from Lemma~\ref{lem:lipschitz_xy_G1}, Lemma~\ref{lem:lipschitz_xy_G2}, Lemma~\ref{lem:lipschitz_xy_G3}, and Lemma~\ref{lem:lipschitz_xy_G4}.
\end{proof}

\subsection{A Core Tool: Upper Bound for Several Basic Functions}\label{sub:lips_H_xy:upper_bound}

In this section, we give an upper bound for each of the basic functions.

\begin{lemma}
    If the following conditions hold
    \begin{itemize}
     \item Let $f(y)_{j_0} \in \R^n $ be defined as Definition~\ref{def:f}.
    \item Let $h(y)_{i_0} \in \R^n $ be defined as Definition~\ref{def:h}.
    \item Let $c(x,y)_{j_0,i_0} \in \R $ be defined as Definition~\ref{def:c}.
    \item Let $ R \geq 4$
    \item $\|A_3 \| \leq R$
    \item $\|y_{i_0} \| \leq R$ 
    \item $\|b_{j_0,i_0} \|_2 \leq   R$
    
    \end{itemize}

    Then, we have
    \begin{itemize}
        \item Part 1. $\| h(y)_{i_0}\|_2 \leq R^2$
        \item Part 2. $| c(x,y)_{j_0,i_0}| \leq 2R^2$
    \end{itemize}
\end{lemma}
\begin{proof}
    {\bf Proof of Part 1.}
    \begin{align*}
        \|h(y)_{i_0} \|_2 = & ~ \|A_3 y_{i_0} \|_2 \\
        \leq & ~ \|A_3 \| \|y_{i_0} \|_2 \\
        \leq & ~ R^2 
    \end{align*}
    where the first step is due to Definition~\ref{def:h}, the second step is based on Fact~\ref{fac:matrix_norm} and the third step is because of Lemma~\ref{lem:upper_bound}.
    
    {\bf Proof of Part 2.}
    \begin{align*}
        |c(x,y)_{j_0,i_0}| = & ~ |\langle f(x)_{j_0}, h(y)_{i_0}\rangle  - b_{j_0,i_0}| \\
        \leq  & ~ \| f(x)_{j_0}\|_2 \|h(y)_{i_0} \|_2 + |b_{j_0,i_0} | \\
        \leq & ~ R^2 + R\\
        \leq & ~ 2R^2
    \end{align*}
    where the first step is because of Definition~\ref{def:c}, the second step is based on triangle inequality and Cauchy–Schwarz inequality, the third step is due to Lemma~\ref{lem:upper_bound}, and the last step follows from $R \geq 4$. 
\end{proof}
\subsection{A Core Tool: Lipschitz Property for Several Basic Functions}\label{sub:lips_H_xy:basic_lips}

In this section, we introduce the Lipschitz property for several basic functions.

\begin{lemma}\label{lem:upper_bound:y}
    If the following conditions hold
    \begin{itemize}
         \item Let $f(y)_{j_0} \in \R^n $ be defined as Definition~\ref{def:f}.
    \item Let $h(y)_{i_0} \in \R^n $ be defined as Definition~\ref{def:h}.
    \item Let $c(x,y)_{j_0,i_0} \in \R $ be defined as Definition~\ref{def:c}.
    \item Let $ R \geq 4$
    \item $\|A_3 \| \leq R$
    \item $\|y_{i_0} \| \leq R$ 
    \item $\|b_{j_0,i_0} \|_2 \leq   R$
    \item Let $R_0$ be defined as Definition~\ref{def:R_0}.
    \end{itemize}

    Then, we have
    \begin{itemize}
        \item Part 1. $\|h(y)_{i_0} -  h(\wt{y})_{i_0}\|_2 \leq R\|y - \wt{y} \|_2$
        \item Part 2.
         $| c(x,y)_{j_0,i_0} - c(\wt{x},y)_{j_0,i_0}| \leq R^2 \cdot R_0 \|x -\wt{x} \|$
         \item Part 3.
         $|c(x,y)_{j_0,i_0} -c(x,\wt{y})_{j_0,i_0})| \leq R \| y - \wt{y} \|_2 $
    \end{itemize}
\end{lemma}
\begin{proof}
    {\bf Proof of Part 1.}
    \begin{align*}
        \|h(y)_{i_0} -  h(\wt{y})_{i_0}\|_2 = & ~ \|A_3 y_{i_0} - A_3 \wt{y}_{i_0} \|_2 \\
        \leq & ~\|A_3 \| \|y_{i_0} - \wt{y}_{i_0}\|_2 \\
        \leq & ~ R\|y - \wt{y} \|_2
    \end{align*}
    where the first step follows from Definition~\ref{def:h}, the second step is based on Fact~\ref{fac:matrix_norm}, and the third step is due to Lemma~\ref{lem:upper_bound}.
    
    {\bf Proof of Part 2.}
    \begin{align*}
        | c(x,y)_{j_0,i_0} - c(\wt{x},y_{j_0,i_0})| = & ~ |\langle f(x)_{j_0}, h(y)_{i_0} \rangle - b_{j_0,i_0} - (\langle f(\wt{x})_{j_0}, h(y)_{i_0} \rangle - b_{j_0,i_0}) |\\
        \leq & ~ \|f(x)_{j_0} - f(\wt{x})_{j_0} \|_2 \| h(y)_{i_0} \|_2\\
        \leq & ~ R^2 \cdot R_0 \|x-\wt{x}\|_2
    \end{align*}
    where the first step is due to Definition~\ref{def:c}, the second step follows from Cauchy–Schwarz inequality, and the third step is because of {\bf Part 1} of Lemma~\ref{sub:lips_H_xy:upper_bound} and {\bf Part 3} of Lemma~\ref{lem:basic_lips}.
    
    {\bf Proof of Part 3.}
    \begin{align*}
        |c(x,y)_{j_0,i_0} -c(x,\wt{y})_{j_0,i_0})| = & ~
        |\langle f(x)_{j_0}, h(y)_{i_0} \rangle - b_{j_0,i_0} - (\langle f(x)_{j_0}, h(\wt{y})_{i_0} \rangle - b_{j_0,i_0}) | \\
        \leq & ~ \|f(x)_{j_0} \|_2 \cdot \|h(y)_{i_0} - h(\wt{y})_{i_0} \|_2 \\
        \leq & ~ R \|y -\wt{y} \|_2
    \end{align*}
    where the first step follows from Definition~\ref{def:c}, the second step is due to Cauchy–Schwarz inequality and the third step is because of {\bf Part 4} of Lemma~\ref{lem:upper_bound} and {\bf Part 1} of this Lemma.
\end{proof}

\subsection{Calculation: Step 1 Lipschitz for Matrix Function \texorpdfstring{$( f(x)_{j_0} \circ h(y)_{i_0} ) f(x)_{j_0}^\top$}{}}\label{sub:lips_H_xy:step1}

In this section, we calculate the Lipschitz for $( f(x)_{j_0} \circ h(y)_{i_0} ) f(x)_{j_0}^\top$.


\begin{lemma}\label{lem:lipschitz_xy_G1}
If the following conditions
\begin{itemize}
    \item Let $G_1(x,y) = ( f(x)_{j_0} \circ h(y)_{i_0} ) f(x)_{j_0}^\top$ 
    \item Let $R_0$ be defined in Definition~\ref{def:R_0}.
    \item Let $\alpha(x)_{j_0} \in \R$ be defined as Definition~\ref{def:alpha}
    \item Let $f(x)_{j_0} \in \R^n$ be defined as Definition~\ref{def:f}
    \item Let $c(x,y)_{j_0,i_0} \in \R$ be defined as Definition~\ref{def:c}
    \item Let $\gamma(x)_{j_0} = \langle f(x)_{j_0}, v \rangle \in \R$
    \item $\| A_1 \|, \| A_2 \| , \| A_3 \| \leq R$, $\| \A_{j_0} \| \leq R$, $\| x \|_2 \leq R$,$| b_{j_0,i_0} | \leq R$, $\| v \|_2 \leq R^2$
    \item Let $R \geq 4$
\end{itemize}
Then, we have 
\begin{align*}
    \|G_1(x,y) - G_1( \wt{x}, \wt{y} ) \| \leq 2R^2 \cdot R_0 (\|x -\wt{x} \|_2 +  \| y -\wt{y} \|_2)
\end{align*}
\end{lemma}
\begin{proof}
We define
\begin{align*}
    G_{1,1} = & ~ ( f(x)_{j_0} \circ h(y)_{i_0} ) f(x)_{j_0}^\top - ( f(\wt{x})_{j_0} \circ h(y)_{i_0} ) f(x)_{j_0}^\top \\
    G_{1,2} = & ~ ( f(\wt{x})_{j_0} \circ h(y)_{i_0} ) f(x)_{j_0}^\top - ( f(\wt{x})_{j_0} \circ h(\wt{y})_{i_0} ) f(x)_{j_0}^\top \\
    G_{1,3} = & ~ ( f(\wt{x})_{j_0} \circ h(\wt{y})_{i_0} ) f(x)_{j_0}^\top - ( f(\wt{x})_{j_0} \circ h(\wt{y})_{i_0} ) f(\wt{x})_{j_0}^\top 
\end{align*}
where the first step follows from definition of $G_{1,1}$, the second step is based on Fact~\ref{fac:vector_norm} and the third step is due to Lemma~\ref{lem:upper_bound}.

We have
\begin{align*}
    \|G_{1,1}\| = & ~ \| ( f(x)_{j_0} \circ h(y)_{i_0} ) f(x)_{j_0}^\top - ( f(\wt{x})_{j_0} \circ h(y)_{i_0} ) f(x)_{j_0}^\top \| \\
    \leq & ~ \|f(x)_{j_0} -f(\wt{x})_{j_0}\|_{\infty} \cdot \| h(y)_{i_0} \|_2 \cdot \|f(x)_{j_0} \|_2 \\
    \leq & ~ R^2 \cdot R_0 \|x - \wt{x} \|_2
\end{align*}
where the first step follows from definition of $G_{1,1}$, the second step is due to Fact~\ref{fac:matrix_norm}, and the third step is based on combining Lemma~\ref{lem:upper_bound}, Lemma~\ref{lem:basic_lips}, and Lemma~\ref{sub:lips_H_xy:upper_bound}.

Also, we have 
\begin{align*}
    \|G_{1,2} \| = & ~ 
    \|( f(\wt{x})_{j_0} \circ h(y)_{i_0} ) f(x)_{j_0}^\top - ( f(\wt{x})_{j_0} \circ h(\wt{y})_{i_0} ) f(x)_{j_0}^\top\| \\
    \leq & ~ \|f(\wt{x})_{j_0} \|_2 \cdot \|h(y)_{i_0} - h(\wt{y})_{i_0} \|_2 \cdot \|f(x)_{j_0} \|_2 \\
    \leq & ~ R\|y -\wt{y} \|_2
\end{align*}
where the first step is based on definition of $G_{1,2}$, the second step is because of Fact~\ref{fac:matrix_norm}, and the third step follows from Lemma~\ref{lem:upper_bound:y}.

Additionally, 
\begin{align*}
    \|G_{1,3} \| = & ~ \|( f(\wt{x})_{j_0} \circ h(\wt{y})_{i_0} ) f(x)_{j_0}^\top - ( f(\wt{x})_{j_0} \circ h(\wt{y})_{i_0} ) f(\wt{x})_{j_0}^\top\| \\
    \leq & ~  \|f(\wt{x})_{j_0} \|_2 \cdot \|h(\wt{y})_{i_0} \|_2 \cdot \|f(x)_{j_0} - f(\wt{x})_{j_0} \|_2 \\
    \leq & ~ R^2 \cdot R_0\|x - \wt{x}\|_2
\end{align*}
where the first step follows from the definition of $G_{1,3}$, the second step follows from Fact~\ref{fac:matrix_norm}, and the third step is because of Lemma~\ref{lem:basic_lips}.

Combining all the above equations we complete the proof.
\end{proof}

\subsection{Calculation: Step 2 Lipschitz for Matrix Function \texorpdfstring{$- \langle f(x)_{j_0}, h(y)_{i_0} \rangle f(x)_{j_0} f(x)_{j_0}^\top$}{}}\label{sub:lips_H_xy:step2}


In this section, we calculate the Lipschitz for $- \langle f(x)_{j_0}, h(y)_{i_0} \rangle f(x)_{j_0} f(x)_{j_0}^\top$.


\begin{lemma}\label{lem:lipschitz_xy_G2}
If the following conditions
\begin{itemize}
    \item Let $\alpha(x)_{j_0} \in \R$ be defined as Definition~\ref{def:alpha}
    \item Let $f(x)_{j_0} \in \R^n$ be defined as Definition~\ref{def:f}
    \item Let $c(x,y)_{j_0,i_0} \in \R$ be defined as Definition~\ref{def:c}
    \item Let $\gamma(x)_{j_0} = \langle f(x)_{j_0}, v \rangle \in \R$
    \item $\| A_1 \|, \| A_2 \| , \| A_3 \| \leq R$, $\| \A_{j_0} \| \leq R$, $\| x \|_2 \leq R$,$| b_{j_0,i_0} | \leq R$, $\| v \|_2 \leq R^2$
    \item Let $R \geq 4$
    \item Let $G_2(x,y) = - \langle f(x)_{j_0}, h(y)_{i_0} \rangle f(x)_{j_0} f(x)_{j_0}^\top$ 
\end{itemize}
Then, we have
\begin{align*}
    \|G_2(x,y) - G_2( \wt{x}, \wt{y} ) \| \leq 3R^2 R_0 (\|x -\wt{x} \|_2 + \| y -\wt{y} \|_2)
\end{align*}
\end{lemma}
\begin{proof}
    We define
    \begin{align*}
        G_{2,1} = & ~ - \langle f(x)_{j_0}, h(y)_{i_0} \rangle f(x)_{j_0} f(x)_{j_0}^\top - (- \langle f(\wt{x})_{j_0}, h(y)_{i_0} \rangle f(x)_{j_0} f(x)_{j_0}^\top) \\
        G_{2,2} = & ~ - \langle f(\wt{x})_{j_0}, h(y)_{i_0} \rangle f(x)_{j_0} f(x)_{j_0}^\top - (- \langle f(\wt{x})_{j_0}, h(\wt{y})_{i_0} \rangle f(x)_{j_0} f(x)_{j_0}^\top)\\
        G_{2,3} = & ~ 
        - \langle f(\wt{x})_{j_0}, h(\wt{y})_{i_0} \rangle f(x)_{j_0} f(x)_{j_0}^\top - (- \langle f(\wt{x})_{j_0}, h(\wt{y})_{i_0} \rangle f(\wt{x})_{j_0} f(x)_{j_0}^\top) \\
        G_{2,4} = & ~ 
        - \langle f(\wt{x})_{j_0}, h(\wt{y})_{i_0} \rangle f(\wt{x})_{j_0} f(x)_{j_0}^\top - (- \langle f(\wt{x})_{j_0}, h(\wt{y})_{i_0} \rangle f(\wt{x})_{j_0} f(\wt{x})_{j_0}^\top)
    \end{align*}

We have 
\begin{align*}
    \|G_{2,1} \| = & ~ \|- \langle f(x)_{j_0}, h(y)_{i_0} \rangle f(x)_{j_0} f(x)_{j_0}^\top - (- \langle f(\wt{x})_{j_0}, h(y)_{i_0} \rangle f(x)_{j_0} f(x)_{j_0}^\top)\|\\
    \leq & ~ \|f(x)_{j_0} - f(\wt{x})_{j_0}  \|_2 \cdot \|h(y)_{i_0} \|_2 \cdot \|f(x)_{j_0} \|_2 \cdot 
    \|f(x)_{j_0} \|_2\\
    \leq & ~ R^2 \cdot R_0 \| x- \wt{x} \|_2 
\end{align*}
where the first step is based on the definition of $G_{2,1}$, the second step follows from Fact~\ref{fac:circ_rules}, and the third step is because of Lemma~\ref{lem:upper_bound}.

and 
\begin{align*}
    \|G_{2,2} \| = & ~ \|- \langle f(\wt{x})_{j_0}, h(y)_{i_0} \rangle f(x)_{j_0} f(x)_{j_0}^\top - (- \langle f(\wt{x})_{j_0}, h(\wt{y})_{i_0} \rangle f(x)_{j_0} f(x)_{j_0}^\top)\| \\
    \leq & ~ \| f(\wt{x})_{j_0}  \|_2 \cdot 
 \|h(y)_{i_0} -h(\wt{y})_{i_0} \| \cdot \|f(x)_{j_0} \|_2 \cdot 
    \|f(x)_{j_0} \|_2 \\
    \leq & ~ R \|y -\wt{y} \|_2
\end{align*}
where the first step is due to the definition of $G_{2,1}$, the second step is based on Fact~\ref{fac:circ_rules}, and the third step follows from Lemma~\ref{lem:upper_bound:y}.

Similarly, we have
\begin{align*}
    \|G_{2,3} \| \leq & ~ R^2 \cdot R_0 \| x- \wt{x} \|_2 \\
    \|G_{2,4} \| \leq & ~ R^2 \cdot R_0 \| x- \wt{x} \|_2 
\end{align*}

Combining all the above equations we complete the proof.
\end{proof}

\subsection{Calculation: Step 3 Lipschitz for Matrix Function \texorpdfstring{$- c(x,y)_{j_0,i_0} \diag( f(x)_{j_0} )$}{}}\label{sub:lips_H_xy:step3}

In this section, we calculate the Lipschitz for $- c(x,y)_{j_0,i_0} \diag( f(x)_{j_0} )$.



\begin{lemma}\label{lem:lipschitz_xy_G3}
If the following conditions
\begin{itemize}
    \item Let $\alpha(x)_{j_0} \in \R$ be defined as Definition~\ref{def:alpha}
    \item Let $f(x)_{j_0} \in \R^n$ be defined as Definition~\ref{def:f}
    \item Let $c(x,y)_{j_0,i_0} \in \R$ be defined as Definition~\ref{def:c}
    \item Let $\gamma(x)_{j_0} = \langle f(x)_{j_0}, v \rangle \in \R$
    \item $\| A_1 \|, \| A_2 \| , \| A_3 \| \leq R$, $\| \A_{j_0} \| \leq R$, $\| x \|_2 \leq R$,$| b_{j_0,i_0} | \leq R$, $\| v \|_2 \leq R^2$
    \item Let $R \geq 4$
    \item Let $R_0$ be defined as Definition~\ref{def:R_0}.
    \item Let $G_3(x,y) = - c(x,y)_{j_0,i_0} \diag( f(x)_{j_0} )$ 
\end{itemize}
Then, we have
\begin{align*}
    \|G_3(x,y) - G_3( \wt{x}, \wt{y} ) \| \leq 3R^2 \cdot R_0  (\|x -\wt{x} \|_2 +  \| y -\wt{y} \|_2)
\end{align*}
\end{lemma}
\begin{proof}
    We define
    \begin{align*}
        G_{3,1} = & ~
        - c(x,y)_{j_0,i_0} \diag( f(x)_{j_0} ) - (- c(\wt{x},y)_{j_0,i_0} \diag( f(x)_{j_0} ))\\
        G_{3,2} = & ~
        - c(\wt{x},y)_{j_0,i_0} \diag( f(x)_{j_0} ) - (- c(\wt{x},\wt{y})_{j_0,i_0} \diag( f(x)_{j_0} )) \\
        G_{3,3} = & ~
        - c(\wt{x},\wt{y})_{j_0,i_0} \diag( f(x)_{j_0} ) - (- c(\wt{x},\wt{y})_{j_0,i_0} \diag( f(\wt{x})_{j_0} ))
    \end{align*}

For $G_{3,1}$, we have
\begin{align*}
    \|G_{3,1} \| = & ~  \|- c(x,y)_{j_0,i_0} \diag( f(x)_{j_0} ) - (- c(\wt{x},y)_{j_0,i_0} \diag( f(x)_{j_0} )) \|\\
    \leq & ~ |c(x,y)_{j_0,i_0} - c(\wt{x},y)_{j_0,i_0} | \cdot \|f(x)_{j_0} \|_2 \\
    \leq & ~ R^2 \cdot R_0\|x - \wt{x} \|_2
\end{align*}
where the first step follows from definition of $G_{3,1}$, the second step is based on Fact~\ref{fac:vector_norm} and the third step is because of Lemma~\ref{lem:upper_bound:y}.

Similarly, we have 
\begin{align*}
    \|G_{3,2} \| \leq & ~  R \|y - \wt{y} \|_2 \\
    \|G_{3,3} \| \leq & ~ 2R^2\cdot R_0 \|x -\wt{x} \|_2
\end{align*}

Combining all the above equations we complete the proof.
\end{proof}

\subsection{Calculation: Step 4 Lipschitz for Matrix Function \texorpdfstring{$c(x,y)_{j_0,i_0} f(x)_{j_0} f(x)_{j_0}^\top$}{}}\label{sub:lips_H_xy:step4}

In this section, we calculate the Lipschitz for $c(x,y)_{j_0,i_0} f(x)_{j_0} f(x)_{j_0}^\top$.


\begin{lemma}\label{lem:lipschitz_xy_G4}
If the following conditions
\begin{itemize}
    \item Let $\alpha(x)_{j_0} \in \R$ be defined as Definition~\ref{def:alpha}
    \item Let $f(x)_{j_0} \in \R^n$ be defined as Definition~\ref{def:f}
    \item Let $c(x,y)_{j_0,i_0} \in \R$ be defined as Definition~\ref{def:c}
    \item Let $\gamma(x)_{j_0} = \langle f(x)_{j_0}, v \rangle \in \R$
    \item $\| A_1 \|, \| A_2 \| , \| A_3 \| \leq R$, $\| \A_{j_0} \| \leq R$, $\| x \|_2 \leq R$,$| b_{j_0,i_0} | \leq R$, $\| v \|_2 \leq R^2$
    \item Let $R \geq 4$
    \item Let $R_0$ be defined in Definition~\ref{def:R_0}.
    \item Let $G_4(x,y) = c(x,y)_{j_0,i_0} f(x)_{j_0} f(x)_{j_0}^\top$ 
\end{itemize}
Then, we have
\begin{align*}
    \|G_4(x,y) - G_4( \wt{x}, \wt{y} ) \| \leq 5R^2 \cdot R_0 (\|x -\wt{x} \|_2 + \| y -\wt{y} \|_2)
\end{align*}
\end{lemma}
\begin{proof}
    We define
    \begin{align*}
        G_{4,1} = & ~ c(x,y)_{j_0,i_0} f(x)_{j_0} f(x)_{j_0}^\top - c(\wt{x},y)_{j_0,i_0} f(x)_{j_0} f(x)_{j_0}^\top \\
        G_{4,2} = & ~ c(\wt{x},y)_{j_0,i_0} f(x)_{j_0} f(x)_{j_0}^\top - c(\wt{x},\wt{y})_{j_0,i_0} f(x)_{j_0} f(x)_{j_0}^\top \\
        G_{4,3} = & ~ c(\wt{x},\wt{y})_{j_0,i_0} f(x)_{j_0} f(x)_{j_0}^\top - c(\wt{x},\wt{y})_{j_0,i_0} f(\wt{x})_{j_0} f(x)_{j_0}^\top \\
        G_{4,4} = & ~ c(\wt{x},\wt{y})_{j_0,i_0} f(\wt{x})_{j_0} f(x)_{j_0}^\top - c(\wt{x},\wt{y})_{j_0,i_0} f(\wt{x})_{j_0} f(\wt{x})_{j_0}^\top
    \end{align*}
For $G_{4,1}$, we have
\begin{align*}
    \|G_{4,1}\| = & ~ \|c(x,y)_{j_0,i_0} f(x)_{j_0} f(x)_{j_0}^\top - c(\wt{x},y)_{j_0,i_0} f(x)_{j_0} f(x)_{j_0}^\top \|\\
    \leq & ~ |c(x,y)_{j_0,i_0} -c(\wt{x},y)_{j_0,i_0}| \cdot  \|f(x)_{j_0} \|_2 \cdot \|f(x)_{j_0} \|_2 \\
    \leq & ~ R^2 \cdot R_0 \| x- \wt{x} \|_2
\end{align*}
where the first step is due to definition of $G_{4,1}$, the second step is because of Fact~\ref{fac:vector_norm} and the third step follows from Lemma~\ref{lem:upper_bound} and Lemma~\ref{lem:basic_lips}. 

Similarly, we have 
\begin{align*}
    \|G_{4,2} \| \leq & ~R \|y - \wt{y} \|_2 \\
    \|G_{4,3} \| \leq & ~ 2R^2 \cdot R_0 \| x -\wt{x}\|_2 \\
    \|G_{4,4} \| \leq & ~ 2R^2 \cdot R_0 \| x -\wt{x}\|_2
\end{align*}
Combining all the above equations we complete the proof.
\end{proof}