%\documentclass{uai2023} % for initial submission
\documentclass[accepted,onecolumn]{uai2023} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2023} % ptmx math instead of Computer
                                         % Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2023} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

% hyperref makes hyperlinks in the resulting PDF.
% If your build breaks (sometimes temporarily if a hyperlink spans a page)
% please comment out the following usepackage line and replace
% \usepackage{icml2022} with \usepackage[nohyperref]{icml2022} above.
\usepackage{hyperref}
\hypersetup{colorlinks=true,citecolor=blue,linkcolor=blue}

% Attempt to make hyperref and algorithmic work together better:
\newcommand{\theHalgorithm}{\arabic{algorithm}}

% Use the following line for the initial blind version submitted for review:
%\usepackage{icml2022}

% If accepted, instead use the following line for the camera-ready submission:
%\usepackage[accepted]{icml2022}
% For theorems and such
%\usepackage{cite}
%\usepackage[cmex10]{amsmath} % Use the [cmex10] option to ensure complicance
                             % with IEEE Xplore (see bare_conf.tex)

\usepackage{amssymb}
\usepackage{amsmath,mathrsfs,dsfont}
\usepackage{nicefrac}
\usepackage{algorithm}
\usepackage{algorithmicx}
\usepackage{algpseudocode}

\usepackage{booktabs}

\usepackage{color}
\usepackage{enumitem}

\usepackage{array}
\usepackage{graphicx,tikz}
\usepackage[mathscr]{euscript}
\usepackage{amsthm}
\PassOptionsToPackage{square,sort,comma,numbers}{natbib}
%\usepackage{cite}
%\usepackage[numbers,super]{natbib}
%\biboptions{sort&compress}
%\usepackage{calrsfs}
\usepackage{multirow}

\usepackage{bm}
\usepackage{bbm}
\usepackage{color}

%\usepackage{lipsum}

\usepackage{color}
\usepackage{epstopdf}
%\usepackage{subcaption}
%\usepackage{cleveref}
\usepackage{thmtools}
\usepackage{thm-restate}
\usepackage{subfigure}
\usepackage{bbding}
%\usepackage{slashbox}
%\usepackage{algorithmic}
%\usepackage{algorithm}

\newcommand\blfootnote[1]{%
  \begingroup
  \renewcommand\thefootnote{}\footnote{#1}%
  \addtocounter{footnote}{-1}%
  \endgroup
}



% if you use cleveref..
%\usepackage[capitalize,noabbrev]{cleveref}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% THEOREMS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\theoremstyle{plain}
\newtheorem{theorem}{Theorem}[section]
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{corollary}[theorem]{Corollary}
\theoremstyle{definition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{assumption}[theorem]{Assumption}
\theoremstyle{remark}
\newtheorem{remark}[theorem]{Remark}

\newcommand\dx{\,\mathrm{d}x}
\newcommand\supp{{\rm supp}}
\newcommand{\cA}{\mathcal{A}}
\newcommand{\cJ}{\mathcal{J}}
\newcommand{\cE}{\mathcal{E}}
\newcommand{\cF}{\mathcal{F}}
\newcommand{\cG}{\mathcal{G}}
\newcommand{\cH}{\mathcal{H}}
\newcommand{\cI}{\mathcal{I}}
\newcommand{\cL}{\mathcal{L}}
\newcommand{\cV}{\mathcal{V}}
\newcommand{\cR}{\mathcal{R}}
\newcommand{\cS}{\mathcal{S}}
\newcommand{\cT}{\mathcal{T}}
\newcommand{\cX}{\mathcal{X}}
\newcommand{\cY}{\mathcal{Y}}
\newcommand{\cZ}{\mathcal{Z}}
\newcommand{\cP}{\mathcal{P}}
\newcommand{\cB}{\mathcal{B}}
\newcommand{\cD}{\mathcal{D}}
\newcommand{\cC}{\mathcal{C}}
\newcommand{\cK}{\mathcal{K}}
\newcommand{\cN}{\mathcal{N}}
\newcommand{\cO}{\mathcal{O}}
\newcommand{\calpha}{\mathcal{\alpha}}
\newcommand{\ba}{\mathbf{a}}
\newcommand{\bb}{\mathbf{b}}
\newcommand{\bd}{\mathbf{d}}
\newcommand{\bg}{\mathbf{g}}
\newcommand{\bq}{\mathbf{q}}
\newcommand{\boldm}{\mathbf{m}}
\newcommand{\bu}{\mathbf{u}}
\newcommand{\bv}{\mathbf{v}}
\newcommand{\bw}{\mathbf{w}}
\newcommand{\bx}{\mathbf{x}}
\newcommand{\by}{\mathbf{y}}
\newcommand{\bz}{\mathbf{z}}
\newcommand{\be}{\mathbf{e}}
\newcommand{\bA}{\mathbf{A}}
\newcommand{\bB}{\mathbf{B}}
\newcommand{\bD}{\mathbf{D}}
\newcommand{\bE}{\mathbf{E}}
\newcommand{\bF}{\mathbf{F}}
\newcommand{\bG}{\mathbf{G}}
\newcommand{\bR}{\mathbf{R}}
\newcommand{\bS}{\mathbf{S}}
\newcommand{\bT}{\mathbf{T}}
\newcommand{\bU}{\mathbf{U}}
\newcommand{\bp}{{\mathbf{p}}}
\newcommand{\bP}{{\mathbf{P}}}
\newcommand{\bmX}{\bm X}
\newcommand{\bX}{\mathbf{X}}
\newcommand{\bmA}{\bm A}
\newcommand{\bmI}{\bm I}
\newcommand{\bone}{\mathbf 1}
\newcommand{\bzero}{\mathbf 0}
\newcommand{\bH}{{\mathbf{H}}}
\newcommand{\bI}{{\mathbf{I}}}
\newcommand{\bL}{{\mathbf{L}}}
\newcommand{\bW}{{\mathbf{W}}}
\newcommand{\bK}{{\mathbf{K}}}
\newcommand{\bY}{{\mathbf{Y}}}
\newcommand{\bQ}{{\mathbf{Q}}}
\newcommand{\bZ}{{\mathbf{Z}}}
\newcommand{\balpha}{\bm{\alpha}}
\newcommand{\bbeta}{\bm{\beta}}
\newcommand{\field}[1]{\mathbb{#1}}
\newcommand{\gras}[1]{\textbf{#1}}
\newcommand{\N}{{\rm I}\kern-0.18em{\rm N}}
\newcommand{\C}{\field{C}}
\newcommand{\bbR}{\mathbb{R}}
\newcommand{\bbP}{\mathbb{P}}
\newcommand{\h}{{\rm I}\kern-0.18em{\rm H}}
\newcommand{\K}{{\rm I}\kern-0.18em{\rm K}}
\newcommand{\p}{{\rm I}\kern-0.18em{\rm P}}
\newcommand{\E}{{\rm I}\kern-0.18em{\rm E}}
\newcommand{\Z}{{\rm Z}\kern-0.18em{\rm Z}}
\newcommand{\I}{{\math I}}
\newcommand{\Q}{{\math Q}}
\newcommand{\1}{{\rm 1}\kern-0.25em{\rm I}}
\newcommand{\U}{{\bf U}}
\newcommand{\tc}{{\bf t}}
\newcommand{\M}{{\bf M}}
\newcommand{\X}{\field{X}}
\newcommand{\ud}{\mathrm{d}}
\newcommand{\re}{\textrm{Re}}
\newcommand{\im}{\textrm{Im}}
\newcommand{\epr}{\hfill\hbox{\hskip 4pt\vrule width 5pt
                  height 6pt depth 1.5pt}\vspace{0.5cm}\par}
\newcommand{\phin}{\varphi_n}
\newcommand{\phinb}{\overline \varphi_n(t)}
\newcommand{\pn}{\p_{\kern-0.25em n}}
\newcommand{\pnm}{\p_{\kern-0.25em n,m}}
\newcommand{\psubm}{\p_{\kern-0.25em m}}
\newcommand{\e}{\textrm{e}}
\newcommand{\symdiff}{%
  \mathbin{\text{\footnotesize$\bigtriangleup$}}}
\newcommand{\symdiffsmall}{%
  \mathbin{\text{\scriptsize $\bigtriangleup$}}}
\newcommand{\Leb}{\mathrm{Leb}_d}

\def\RR{\mathbb{R}}
\def\defeq {\coloneqq}
\newcommand{\circled}[1]{\small{\raisebox{.6pt}{\textcircled{\raisebox{-.8pt}{#1}}}}}
\newcommand{\stcomp}[1]{\overline{#1}}
\def\prox{\textup {prox}}
\def \supp#1{\textup{supp}(#1)}
\def\set#1{\left\{ #1 \right\}}
\def\pth#1{\left( #1 \right)}
\def\bth#1{\left[ #1 \right]}
\def\abth#1{\left | #1 \right |}
\def \eps  {\epsilon}
\newcommand{\bfm}[1]{\ensuremath{\mathbf{#1}}}
\def\bm{\bfm m}

%equations
\newcommand{\beq}{\begin{equation}}
\newcommand{\eeq}{\end{equation}}
\newcommand{\beqa}{\begin{eqnarray}}
\newcommand{\eeqa}{\end{eqnarray}}
\newcommand{\beqas}{\begin{eqnarray*}}
\newcommand{\eeqas}{\end{eqnarray*}}
\def\bal#1\eal{\begin{align}#1\end{align}}
\def\bals#1\eals{\begin{align*}#1\end{align*}}
\def\bsal#1\esal{\begin{small}\begin{align}#1\end{align}\end{small}}
\def\bsals#1\esals{\begin{small}\begin{align*}#1\end{align*}\end{small}}
\def\bsfal#1\esfal{\begin{small}\begin{flalign}#1\end{flalign}\end{small}}
\newcommand{\nn}{\nonumber}
\def\vgap{\vspace*{.1in}}

%big O notation
\newcommand{\BigO}[1]{{\operatorname{O}}}


\DeclareMathOperator*{\argmin}{arg\,min}

\newtheorem{MyAssumption}{Assumption}
\newtheorem{MyDefinition}{Definition}
\newtheorem{MyLemma}{Lemma}
\newtheorem{MyTheorem}{Theorem}
\newtheorem{MyCorollary}{Corollary}
\newtheorem{MyRemark}{Remark}
\newtheorem{MyAlgorithm}{Algorithm}[section]
\newtheorem{MyMethod}{Method}[section]
\newtheorem{MyProposition}{Proposition}
\newtheorem{MyClaim}{Claim}
\newtheorem{MyObservation}{Observation}


%letter numbered
\newtheorem{MyLemmaX}{Lemma}
\renewcommand{\theMyLemmaX}{\Alph{MyLemmaX}} % "letter-numbered" lemmas
\newtheorem{MyDefinitionX}{Definition}
\renewcommand{\theMyDefinitionX}{\Alph{MyDefinitionX}} % "letter-numbered" lemmas



\def\norm#1#2{{\left\|#1\right\|}_{#2}}

\def\lonenorm#1{\norm{#1}{1}}
\def\ltwonorm#1{\norm{#1}{2}}
%\def\fnorm#1{\Norm{#1}{\textup{F}}}
\def\fnorm#1{\norm{#1}{\textup{F}}}
\def\supnorm#1{\norm{#1}{\infty}}
\def\opnorm#1{\norm{#1}{\textup{OP}}}

\title{Supplementary for Locally Regularized Sparse Graph by Fast Proximal Gradient Descent}

% The standard author block has changed for UAI 2023 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
%\author[1]{\href{mailto:<jj@example.edu>?Subject=Your UAI 2023 paper}{Jane~J.~von~O'L\'opez}{}}
\author[]{Dongfang Sun, Yingzhen Yang}
%\author[1,2]{Further~Coauthor}
%\author[3]{Further~Coauthor}
%\author[1]{Further~Coauthor}
%\author[3]{Further~Coauthor}
%\author[3,1]{Further~Coauthor}
% Add affiliations after the authors
\affil[]{%
School of Computing and Augmented Intelligence\\
Arizona State University, Tempe, AZ 85281, USA \\
\texttt{\{dsun30,yingzhen.yang\}@asu.edu}
}
%\affil[2]{%
%    Second Affiliation\\
%    Address\\
%    …
%}
%\affil[3]{%
%    Another Affiliation\\
%    Address\\
%    …
%  }

\begin{document}
%\onecolumn
\maketitle


\section{Proofs and More Technical Results}
\label{sec:proofs}
\iffalse
\bal\label{eq:srsg-cdi-expand}
&\mathop {\min }\limits_{\bZ^{i} \in \RR^{n}, \bZ_i^i=0} F(\bZ^i) = \ltwonorm{{\bx_i} - {\bX}\bZ^{i}}^2 + \gamma \sum\limits_{k=1}^n c_{ki} \1_{\bZ_k^i \neq 0}.
\eal%

\bal\label{eq:srsg-cdi-simple}
&\mathop {\min }\limits_{\bz \in \RR^n, \bz_i = 0} \tilde F(\bz) = \ltwonorm{{\bx_i} - {\bX}\bz}^2 + \gamma \sum\limits_{k \colon 1 \le k \le n, c_{ki} > 0} c_{ki} \1_{\bz_k \neq 0},
\eal%

$$h_{\gamma,c}(\bz) \triangleq \gamma \sum\limits_{k \colon 1 \le k \le n, c_{ki} > 0} c_{ki} \1_{\bz_k \neq 0}.$$
\fi

\begin{proposition}\label{proposition:srsg-cdi-simple}
Define $\cC^{+} = \{t \colon 1 \le t \le n, c_{ti} > 0\}$, and $\cC^{-} = \{t \colon 1 \le t \le n, c_{ti} < 0\}$.
Let $\bz^*$ be a critical point of function $\tilde F$ in eq.(7) of the main paper. Then for arbitrary small positive number $\varepsilon > 0$,
$\tilde \bz^{*,\eps} \in \RR^n$ defined by
\bal\label{eq:proposition-tilde-z-eps}
\tilde \bz_k^{*,\eps} = \begin{cases}
\bz^*_k &\textup{if } \bz_k^* \neq 0 \textup{ or } k \in \cC^{+} \\
\varepsilon &\textup{otherwise}
\end{cases}
\eal
Then there exists $\bu \in \tilde \partial F(\tilde \bz_k^{*,\eps})$ for $F$ in eq.(6) of the main paper such that $\ltwonorm{\bu} \le L_f |\cC|\varepsilon$ where $L_f \defeq 2 \sigma_{\max}(\bX^{\top} \bX)$.
\end{proposition}
\begin{proof}
Since the only different elements between $\tilde \bz^{*,\eps}$ and $\bz^*$ are those with indices in $\cA = \cC^{-1} \bigcap \{k \colon \bz_k^* = 0\}$, we have
\bals
\ltwonorm{\nabla f(\tilde \bz^*) - \nabla f(\bz^*)} \le L_f \ltwonorm{\tilde \bz^* - \bz^*} \le L_f |\cC^{-1}|  \varepsilon,
\eals
where $L_f = 2 \sigma_{\max}(\bX^{\top} \bX)$. Because $\bz^*$ be a critical point of function $\tilde F$, there exists $\bq \in \tilde \partial h_{\gamma,c}$ such that $\bp \defeq \nabla f(\bz^*) + \bq = \bzero$. Define $\tilde h_{\gamma,c} = \gamma \sum\limits_{k=1}^n c_{ki} \1_{\bZ_k^i \neq 0}$. With the definition of $\tilde \bz^{*,\eps}$, we have $\tilde \bq \in \tilde \partial \tilde h_{\gamma,c}(\tilde \bz^{*,\eps})$ such that $\tilde \bq_k = 0$ for $k \in \cA$ and $\tilde \bq_k = \bq_k$ otherwise.
Moreover, $\bq_k = 0$ for all $k \in \cA$.

 Therefore,  let $\tilde \bp \triangleq \nabla f(\tilde \bz^{*,\eps}) + \tilde \bq \in \partial F(\tilde \bz^{*,\eps})$, we have
\bals
\ltwonorm{\tilde \bp} = \ltwonorm{\tilde \bp - \bp} =  \ltwonorm{\nabla f(\tilde \bz^*) - \nabla f(\bz^*)} \le L_f |\cC^{-1}|  \varepsilon.
\eals
The claim of this proposition follows with $\bu = \tilde \bp$.


\end{proof}
%It follows that the sequence $\{F({\bZ^i}^{(t)})\}_{t}$ converges as a sequence indexed by $t$ for each 1 $\le i \le n$, so the PGD method converges.

We repeat critical equations in the main paper and define more notations before stating the proof of Theorem 3.2.

\bals
\prox_{s h_{\gamma,c}}(\bu) \defeq \argmin\limits_{\bv \in \RR^{n},\bv_i = 0} {\frac{1}{2s}\ltwonorm{\bv - \bu}^2 + h_{\gamma,c}(\bz)} =
T_{s,\gamma,c}(\bu),
\eals%
where $s>0$ is the step size, $T_{s,\gamma,c}$ is an element-wise hard thresholding operator. For $1 \le t \le n$,
\bal\label{eq:T-thresholding}
        &[T_{s,\gamma,c}(\bu)]_t=
        \left\{
        \begin{array}
                {r@{\quad:\quad}l}
                0 & {\abth{\bu_t} \le \sqrt{2s \gamma c_{ti}} \,\, {\rm and} \,\, c_{ti} >0, \,\, {\rm or} \,\, t = i  } \\
                {\bu_t} & {\rm otherwise}
        \end{array}
        \right.
\eal%

\subsection{Proof of Theorem 3.2}

\iffalse
We need the following lemma.

\begin{lemma}\label{lemma:fpgd-bounded-gradient-supp-shrink}
Let $\{{\bz}^{(m)}\}$ be the sequence generated by Algorithm~(\ref{alg:fpgd-sp}), and the sequence $\{\lambda_k\}$ satisfy $\lambda_{k+1} \ge \frac{k+1}{k} \lambda_k$ for all $k \ge 1$. Then for any $k \ge 1$, $\lambda_k$ can be chosen such that $\supp(\bv_{\cC}^{(k)}) = \supp_{\cC}(\bz_{\cC}^{(k)})$. Moreover, there exists a constant $G' = M_{s_0}$ such that
$\ltwonorm{\nabla f(\bm^{(k)})} \le G'$, and
$\supp{{\bz_{\cC}}^{(k)}} \subseteq \supp{{\bz_{\cC}}^{(k-1)}}$ for all $m \ge 1$.
\end{lemma}
\begin{proof}
It follows by (\ref{eq:convex-zk-1}) and (\ref{eq:convex-zk-2}) in Algorithm~\ref{alg:fpgd-sp}
that $\tilde F(\bz^{(k)}) \le \tilde F(\bz^{(k-1)})$ for all $k \ge 1$. It follows that
\bal\label{eq:fpgd-bounded-gradient-supp-shrink-seg1}
f(\bz^{(k)}) \le \tilde F(\bz^{(k)}) \le \tilde F(\bz^{(0)}).
\eal

As a result, combined with the $L_f$ smoothness of $f$, we have
\bal
\ltwonorm{\nabla f(\bz^{(k)}) } &\le 2 \norm{\bX}{2} \sqrt{\tilde F(\bz^{(0)})}, \forall k \ge 1 \label{eq:fpgd-bounded-gradient-supp-shrink-seg2}.
\eal

Also,
\bal\label{eq:fpgd-bounded-gradient-supp-shrink-seg3}
\ltwonorm{\nabla f(\bm^{(k)}) } &\le 2 \norm{\bX}{2} \ltwonorm{{\bx_i} - {\bX}\bm^{(k)}}
\nonumber \\
&\le 2 \norm{\bX}{2} \pth{(1-\alpha_k) \ltwonorm{{\bx_i} - {\bX}\bz^{(k-1)}} + \alpha_k \ltwonorm{{\bx_i} - {\bX}\bv^{(k-1)}} } \nonumber \\
&\le \underbrace{2 \norm{\bX}{2} \sqrt{\tilde F(\bz^{(0)})} + 2 \norm{\bX}{2}\ltwonorm{\bx_i} }_{\defeq c_1}+ 2 \norm{\bX}{2}^2 \alpha_k \ltwonorm{\bv^{(k-1)}} \nonumber \\
&=c_1 + 2 \norm{\bX}{2}^2 \alpha_k \ltwonorm{\bv^{(k-1)}}.
\eal

According to (\ref{eq:convex-tilde-vk}) and (\ref{eq:convex-vk}) in
Algorithm~(\ref{alg:fpgd-sp}), we have
\bal\label{eq:fpgd-bounded-gradient-supp-shrink-seg4}
\ltwonorm{\bv^{(k)}} &\le \ltwonorm{\tilde \bv^{(k)}} \nonumber \\
&\le \ltwonorm{\bv^{(k-1)} - \lambda_k \nabla f(\bm^{(k)})} \nonumber \\
&\le \ltwonorm{\bv^{(k-1)}} + \eta k \ltwonorm{\nabla f(\bm^{(k)}) } \nonumber \\
&\le \ltwonorm{\bv^{(k-1)}} + \eta c_1  k +
\eal



We prove this lemma by mathematical induction that there exists a constant $G'$ such that $\|\nabla f(\bm^{(m)})\|_2 \le G'$, and $\supp ({\bz}^{(m)}) \subseteq \supp ({\bz}^{(m-1)})$ for all $m \ge 1$, $\lambda_k$ can be chosen such that $\supp(\bv_{\cC}^{(k)}) = \supp_{\cC}(\bx_{\cC}^{(k)})$. At the initialization with $k=1$, $\bm^{(k)} = (1 - \alpha_k) \bz^{(k-1)} + \alpha_k \bv^{(k-1)}$. Define $\tilde \bz^{(k)} \triangleq \bm^{(k)} - s \nabla f(\bm^{(k)})$.

\bal
&\bz^{(k)} = {\rm prox}_{s h_{\gamma,c} }(\bm^{(k)} - s \nabla f(\bm^{(k)})) = T_{s,\gamma,c}(\tilde \bz^{(k)}).
\eal%

Suppose $\bm_j^{(k)} = 0$, then $\tilde \bz_j^{(k)} = -s \nabla [f(\bm^{(k)})]_j$. By (\ref{eq:T-thresholding}), $\bz_j^{(k)} = T_{s,\gamma,c}(\tilde \bz_j^{(k)})$. If $j = i$, then $\bz_j^{(k)} = 0$. For $j \neq i$, $\bz_j^{(k)} = 0$ if $\tilde \bz_i^{(k)} \le \sqrt{2s \gamma c_{ki}}$ if $j \in \cC$, $\bz_j^{(k)} = 0$ if $j \neq \cC$.

It can be verified that $\tilde \bz_i^{(k)} \le \sqrt{2s \gamma c_{ki}}$ with $\tilde \bz_j^{(k)} = -s \nabla [f(\bm^{(k)})]_j$ and $s \le \frac{2\tau}{\|\nabla f(\bm^{(k)})\|_2^2}$.

Therefore, $\bz_j^{(k)} = 0$ if $j \in \cC$ and $\bm_j^{(k)} = 0$. Because $\supp ({\bz_{\cC}}^{(k-1)}) = \supp ({\bm_{\cC}}^{(k)})$, $\supp ({\bz_{\cC}}^{(k)}) \subseteq \supp ({\bz_{\cC}}^{(k-1)})$. Define $\tilde \bv^{(k)} = \bv^{(k-1)} - \nabla f(\bm^{(k)})$. Since $\bz^{(k-1)} = \bv^{(k-1)}$, we can choose $\lambda_k > 0$ such that $\tilde \bv_j^{(k)} \neq 0$ for all $j \in \cC \bigcap \supp(\bz^{(k)})$, it follows that $\supp(\bv_{\cC}^{(k)}) = \supp_{\cC}(\bx_{\cC}^{(k)})$. To sum up, we have $\supp ({\bz_{\cC}}^{(k)}) \subseteq \supp ({\bz_{\cC}}^{(k-1)})$ and there exists $\lambda_1 > 0$ such that $\supp(\bv_{\cC}^{(k)}) = \supp_{\cC}(\bx_{\cC}^{(k)})$ for $k=1$.

Suppose that $\supp ({\bz_{\cC}}^{(k)}) \subseteq \supp ({\bz_{\cC}}^{(k-1)})$ and there exists $\lambda_k > 0$ such that $\supp(\bv_{\cC}^{(k)}) = \supp_{\cC}(\bz_{\cC}^{(k)})$ for all $k \le k'$. We have $\|\nabla f(\bm^{(m)})\|_2 \le G'$ for all $k \le k'$. By setting $s = \min\{\frac{2\tau}{G'^2}, c_0 L_f\}$ and (\ref{eq:T-thresholding}), it can be verified that $\supp ({\bz_{\cC}}^{(k)}) \subseteq \supp ({\bz_{\cC}}^{(k-1)})$ for $k = k'+1$ and we still have $\|\nabla f(\bm^{(k'+1)})\|_2 \le G'  = M_{s_0}$. Therefore, the claims holds.
\end{proof}
\fi


\begin{proof}[\textup{\bf Proof of Theorem 3.2}]
First of all, it can be verified that $\supp{{\bz_{\cC}}^{(k)}} \subseteq \supp{{\bz_{\cC}}^{(k-1)}}$ for all $k \ge 1$ when $s < \frac{2\tau}{G^2}$. Therefore, there exists a finite $k' \ge 1$ such that $\set{{\bz_{\cC}}^{(k)}}_{k \ge k'}$ have the same support $\cS$. We note that $\lambda$ can be also be slightly adjusted so that $\supp{\bv_{\cC}^{(k)}} = \cS$ for all $k \ge k_0$. Now we consider any $k > k'$ in the sequel, and let $\bz \in \RR^n$ be a vector such that $\supp{\bz_{\cC}} = \cS$.

Because $f$ have $L_f$-Lipschitz continuous gradient, we have
\bal\label{eq:convex-theorem-seg1}
f(\bz^{(k)}) &\le f(\bm^{(k)}) + \langle \nabla f(\bm^{(k)}), \bz^{(k)}-\bm^{(k)} \rangle + \frac{L_f}{2} \ltwonorm{\bz^{(k)}-\bm^{(k)}}^2.
%\nonumber \\
%&= f(\bm^{(k)}) + \langle \nabla f(\bm^{(k)}), \bz^{(k)}-\bm^{(k)} \rangle + \frac{L_f s^2}{2} \ltwonorm{\bG^{(k)}}^2.
\eal%

%\bal\label{eq:convex-theorem-seg2}
%h_{\gamma,c}(g(\bz^{(k)})) &\le h_{\gamma,c}(g(\bm^{(k)})) + \langle \nabla h_{\gamma,c}(g(\bm^{(k)})), g(\bz^{(k)})-g(\bm^{(k)}) \rangle \nonumber \\
%&\mathrel{\phantom{=}}+ \frac{L_h}{2} \ltwonorm{g(\bz^{(k)})-g(\bm^{(k)})}^2.
%\eal%
%%Because $\bv^{(k-1)}$ and $\bz^{(k-1)}$ are on the same piece due to the projection operation (\ref{eq:convex-yk}), $\bm^{(k)}$ is also on that piece since it is an interpolation of $\bv^{(k-1)}$ and $\bz^{(k-1)}$. As a result,
%Due to the convexity of $g$, nonnegativity of $\nabla h_{\gamma,c}$, and the Lipschitz continuity of $g$, we have
%\bal\label{eq:convex-theorem-seg3-1}
%&\langle \nabla h_{\gamma,c}(g(\bm^{(k)})), g(\bz^{(k)})-g(\bm^{(k)}) \rangle \nonumber \\
%&\le \langle \nabla h_{\gamma,c}(g(\bm^{(k)})) \cdot \nabla g(\bz^{(k)}), \bz^{(k)} - \bm^{(k)} \rangle \nonumber \\
%&\le \langle \nabla h_{\gamma,c}(g(\bm^{(k)})) \cdot \pth{ \nabla g(\bm^{(k)}) + \nabla g(\bz^{(k)}) - \nabla g(\bm^{(k)})}, \bz^{(k)} - \bm^{(k)} \rangle  \nonumber \\
%&\le \langle \nabla h_{\gamma,c}(g(\bm^{(k)})) \cdot \nabla g(\bm^{(k)}), \bz^{(k)} - \bm^{(k)} \rangle \nonumber \\
%&\mathrel{\phantom{=}}+ \langle \nabla h_{\gamma,c}(g(\bm^{(k)})) \cdot \pth{\nabla g(\bz^{(k)}) - \nabla g(\bm^{(k)})}, \bz^{(k)} - \bm^{(k)} \rangle \nonumber \\
%&\le \langle \nabla h_{\gamma,c}(g(\bm^{(k)})) \cdot \nabla g(\bm^{(k)}), \bz^{(k)} - \bm^{(k)} \rangle \nonumber \\
%&\mathrel{\phantom{=}}+ \ltwonorm{\nabla h_{\gamma,c}(g(\bm^{(k)}))} \ltwonorm{\nabla g(\bz^{(k)}) - \nabla g(\bm^{(k)})}} \ltwonorm{\bz^{(k)} - \bm^{(k)}} \nonumber \\
%&\le \langle \nabla h_{\gamma,c}(g(\bm^{(k)})) \cdot \nabla g(\bm^{(k)}), \bz^{(k)} - \bm^{(k)} \rangle + H L_g \ltwonorm{\bz^{(k)} - \bm^{(k)}}^2 \nonumber \\
%&\stackrel{\circled{1}}{\le} \langle \nabla h_{\gamma,c}(g(\bm^{(k)})) \cdot \nabla g(\bm^{(k)}), \bz^{(k)} - \bm^{(k)} \rangle + HL_g \ltwonorm{\bm^{(k)} - \bm^{(k)}}^2 \nonumber \\
%&\stackrel{\circled{2}}{\le} \langle \nabla h_{\gamma,c}(g(\bm^{(k)})) \cdot \nabla g(\bm^{(k)}), \bz^{(k)} - \bm^{(k)} \rangle + HL_g s^2 \ltwonorm{\bG^{(k)}}^2,
%\eal%
%where $\circled{1}$ is due to the fact that
%\begin{align*}
%\ltwonorm{\bz^{(k)} - \bm^{(k)}} &= \ltwonorm{\bbP_{\bm^{(k)}}\pth{\bm^{(k)}} - \bm^{(k)}} \nonumber \\
%&= \ltwonorm{\bbP_{\bm^{(k)}}\pth{\bm^{(k)}} - \bbP_{\bm^{(k)}} \pth{\bm^{(k)}}} = \ltwonorm{\bbP_{\bm^{(k)}}\pth{\bm^{(k)}) - \bm^{(k)}}} \nonumber \\
%&\le \ltwonorm{\bm^{(k)}) - \bm^{(k)}}.
%\end{align*}%
Also,
\bal\label{eq:convex-theorem-seg2}
&f(\bm^{(k)}) - (1-\alpha_k)f(\bz^{(k-1)}) - \alpha_k f(\bz) \nonumber \\
&= (1-\alpha_k) \pth{ f(\bm^{(k)}) - f(\bz^{(k-1)}) } + \alpha_k \pth{ f(\bm^{(k)}) - f(\bz) } \nonumber \\
&\stackrel{\circled{1}}{\le} (1-\alpha_k) \langle \nabla f(\bm^{(k)}), \bm^{(k)} - \bz^{(k-1)} \rangle + \alpha_k \langle \nabla f(\bm^{(k)}), \bm^{(k)} - \bz \rangle \nonumber \\
&\le \langle \nabla f(\bm^{(k)}), (1-\alpha_k) (\bm^{(k)} - \bz^{(k-1)}) + \alpha_k (\bm^{(k)} - \bz)  \rangle \nonumber \\
&= \langle \nabla f(\bm^{(k)}),  \bm^{(k)} - (1-\alpha_k) \bz^{(k-1)}  - \alpha_k \bz \rangle,
%\nonumber \\ &=\alpha_k \langle \nabla f(\bm^{(k)}),  \bv^{(k-1)}-\bz \rangle
\eal
where $\circled{1}$ is due to the convexity of $f$.

%By (\ref{eq:convex-mk}), $\bm^{(k)} - \bm^{(k-1)} = \alpha_k (\bv^{(k-1)} - \bm^{(k-1)})$. Furthermore, since  $\bm^{(k-1)} = \bbP_{\bv^{(k-1)}}(\bz^{(k-1)})$ by (\ref{eq:convex-uk}), and $\bv^{(k-1)} = \bbP_{\bv^{(k-1)}}(\bv^{(k-1)})$ due to the definition of support projection operator, we have
%\bal\label{eq:convex-theorem-seg2-1}
%\ltwonorm{\bv^{(k-1)} - \bm^{(k-1)}}^2 &= \ltwonorm{\bbP_{\bv^{(k-1)}}(\bv^{(k-1)}) - \bbP_{\bv^{(k-1)}}(\bz^{(k-1)})}^2 \nonumber \\
%&=\ltwonorm{\bbP_{\bv^{(k-1)}}(\bv^{(k-1)}-\bz^{(k-1)})}^2 \nonumber \\
%&\stackrel{\circled{1}}{\le} \ltwonorm{\bv^{(k-1)}-\bz^{(k-1)}}^2.
%\eal
%$\circled{1}$ is due to the fact that $\ltwonorm{\bbP_{\bm}(\bv)} \le \ltwonorm{\bv}$ for any two vectors $\bm$ and $\bv$ of the same size.

We have $\tilde \bv^{(k)} = \bv^{(k-1)} - \lambda_k \nabla f(\bm^{(k)})$, and it follows that
\bal \label{eq:convex-theorem-seg3-1}
&\frac{1}{2 \lambda_k} \pth{ \ltwonorm{\bv^{(k-1)} - \bz}^2 -  \ltwonorm{\bv^{(k)} - \bz}^2 - \ltwonorm{\bv^{(k)} - \bv^{(k-1)}}^2 } \nonumber \\
&= \frac{1}{\lambda_k} \langle \bz-\bv^{(k)}, \bv^{(k)} - \bv^{(k-1)}\rangle \nonumber \\
% &= \frac{1}{\lambda_k} \langle \bz-\bv^{(k)}, \bbP_{(\cC \cap \supp{\bz^{(k)}}) \cup \stcomp{\cC}}(\tilde \bv^{(k)}) - \bv^{(k-1)}\rangle \nonumber \\
% &= \frac{1}{\lambda_k} \langle \bz-\bv^{(k)}, \bbP_{(\cC \cap \supp{\bz^{(k)}}) \cup \stcomp{\cC}}(\tilde \bv^{(k)}) - \bbP_{(\cC \cap \supp(\bz^{(k-1)})) \cup \stcomp{\cC}}(\bv^{(k-1)}) \rangle \nonumber \\
% &= \frac{1}{\lambda_k} \langle \bz-\bv^{(k)}, \bbP_{(\cC \cap \supp{\bz^{(k)}}) \cup \stcomp{\cC}}(\tilde \bv^{(k)} - \bv^{(k-1)}) \rangle \nonumber \\
% &=\frac{1}{\lambda_k} \pth{\langle \bbP_{(\cC \cap \supp(\bz^{(k)}))}(\bz-\bv^{(k)}), \bbP_{(\cC \cap \supp(\bz^{(k)}))}(\tilde \bv^{(k)} - \bv^{(k-1)}) \rangle +
% \langle \bbP_{\stcomp{\cC}}(\bz-\bv^{(k)}), \bbP_{\stcomp{\cC}} (\tilde \bv^{(k)} - \bv^{(k-1)}) \rangle } \nonumber \\
% &\stackrel{\circled{1}}{=}\frac{1}{\lambda_k} \pth{ \langle \bbP_{\cC}(\bz-\bv^{(k)}), \bbP_{\cC}(\tilde \bv^{(k)} - \bv^{(k-1)}) \rangle +
% \langle \bbP_{\stcomp{\cC}}(\bz-\bv^{(k)}), \bbP_{\stcomp{\cC}} (\tilde \bv^{(k)} - \bv^{(k-1)}) \rangle } \nonumber \\
% &=\frac{1}{\lambda_k}  \langle \bz-\bv^{(k)}, \tilde \bv^{(k)} - \bv^{(k-1)} \rangle \nonumber \\
&\stackrel{\circled{1}}{=} \frac{1}{\lambda_k}\langle \bz-\bv^{(k)}, \tilde \bv^{(k)} - \bv^{(k-1)} \rangle\nonumber \\
&= \langle \nabla f(\bm^{(k)}),  \bv^{(k)}-\bz \rangle,
\eal%
and $\circled{1}$ is due to the fact that $\supp{\bz_{\cC} - \bv_{\cC}^{(k)} } \subseteq \cS$ because $\supp{\bz_{\cC}} = \cS$, $\supp{\bv_{\cC}^{(k)}} \subseteq \cS$.

Because $\supp{\bv_{\cC}^{(k)}} \subseteq \supp{\bz_{\cC}}$, we have
\bal
h_{\gamma,c}(\bv^{(k)}) \le h_{\gamma,c}(\bz). \label{eq:convex-theorem-seg3-2}
\eal%

It follows by (\ref{eq:convex-theorem-seg3-1}) and (\ref{eq:convex-theorem-seg3-2})
that
\bal \label{eq:convex-theorem-seg3}
&\langle \nabla f(\bm^{(k)}), \bv^{(k)}-\bz \rangle + h_{\gamma,c}(\bv^{(k)}) \nonumber \\
&\le h_{\gamma,c}(\bz)
+ \frac{1}{2 \lambda_k} \pth{ \ltwonorm{\bv^{(k-1)} - \bz}^2 -  \ltwonorm{\bv^{(k)} - \bz}^2 - \ltwonorm{\bv^{(k)} - \bv^{(k-1)}}^2 }
\eal%

%Adding (\ref{eq:convex-theorem-seg3-1}) and (\ref{eq:convex-theorem-seg3-2}), we have
Similar to (\ref{eq:convex-theorem-seg3-1}),  we have
\bal \label{eq:convex-theorem-seg4-1}
\frac{1}{2 s} \pth{ \ltwonorm{\bm^{(k)} - \bz}^2 -  \ltwonorm{\bz^{(k)} - \bz}^2 - \ltwonorm{\bz^{(k)}- \bm^{(k)}}^2 }
&= \frac{1}{s} \langle \bz-\bz^{(k)}, \bz^{(k)}- \bm^{(k)}\rangle.
\eal%

For any $\bq \in \partial h_{\gamma,c}(\bz^{(k)})$, due to the fact that $\supp{\bz_{\cC}} = \supp{\bz_{\cC}^{(k)}}$,
\bal \label{eq:convex-theorem-seg4-2}
&\langle \bz-\bz^{(k)}, \bq \rangle + h_{\gamma,c}(\bz^{(k)}) = h_{\gamma,c}(\bz).
\eal%

By (\ref{eq:convex-theorem-seg4-1}) and (\ref{eq:convex-theorem-seg4-2}),
\bal \label{eq:convex-theorem-seg4-3}
&\langle \bz- \bz^{(k)}, \frac{1}{s} (\bz^{(k)}- \bm^{(k)}) + \bq \rangle + h_{\gamma,c}(\bz^{(k)}) \nonumber \\
&= h_{\gamma,c}(\bz) + \frac{1}{2 s} \pth{ \ltwonorm{\bm^{(k)} - \bz}^2 -  \ltwonorm{\bz^{(k)}- \bz}^2 - \ltwonorm{\bz^{(k)}- \bm^{(k)}}^2 }
\eal%

By the optimality condition of the proximal mapping in eq.(10) in Algorithm 1, we can choose $\bq \in \partial h_{\gamma,c}(\bz^{(k)})$ such that $\bz^{(k)}= \bm^{(k)} - s \pth{ \nabla f(\bm^{(k)}) + \bq }$. Plugging such $\bq$ in (\ref{eq:convex-theorem-seg4-3}), we have
\bal \label{eq:convex-theorem-seg4}
\langle \nabla f(\bm^{(k)}), \bz^{(k)}-\bz \rangle + h_{\gamma,c}(\bz^{(k)}) &= h_{\gamma,c}(\bz) + \frac{1}{2 s} \pth{ \ltwonorm{\bm^{(k)} - \bz}^2 -  \ltwonorm{\bz^{(k)}- \bz}^2 - \ltwonorm{\bz^{(k)}- \bm^{(k)}}^2 }
\eal%

%\footnote{ Because $\supp(\bz_{\cC}^{(k-1)}) = \supp(\bv_{\cC}^{(k)})$, %we always have $\supp(\bz_{\cC}) \subseteq \supp(\bz_{\cC}^{(k-1)})$. If %$\supp(\bz_{\cC}) \subset \supp(\bz_{\cC}^{(k-1)})$, $\lambda_k$ can be %adjusted by an arbitrary small amount so make sure that $\supp(\bz_{\cC}) %= \supp(\bz_{\cC}^{(k-1)})$. }

Setting $\bz = (1-\alpha_k)\bz^{(k-1)} + \alpha_k \bv^{(k)}$
in (\ref{eq:convex-theorem-seg4}), we have
\bal \label{eq:convex-theorem-seg5}
&\langle \nabla f(\bm^{(k)}), \bz^{(k)}- (1-\alpha_k)\bz^{(k-1)} - \alpha_k \bv^{(k)} \rangle + h_{\gamma,c}(\bz^{(k)}) \nonumber \\
&\le h_{\gamma,c}((1-\alpha_k)\bz^{(k-1)} + \alpha_k \bv^{(k)}) + \frac{1}{2 s} \pth{ \ltwonorm{\bm^{(k)} - (1-\alpha_k)\bz^{(k-1)}- \alpha_k \bv^{(k)}}^2 - \ltwonorm{\bz^{(k)}- \bm^{(k)}}^2 } \nonumber \\
&\stackrel{\circled{1}}{\le} (1-\alpha_k) h_{\gamma,c} (\bz^{(k-1)}) + \alpha_k h_{\gamma,c} (\bv^{(k)}) \nonumber \\
&\phantom{=}+ \frac{1}{2 s} \pth{ \ltwonorm{\bm^{(k)} - (1-\alpha_k)\bz^{(k-1)}- \alpha_k \bv^{(k)}}^2  - \ltwonorm{\bz^{(k)}- \bm^{(k)}}^2 } \nonumber \\
&\stackrel{\circled{2}}{\le} (1-\alpha_k) h_{\gamma,c} (\bz^{(k-1)}) + \alpha_k h_{\gamma,c} (\bv^{(k)}) + \frac{1}{2 s} \pth{ \alpha_k^2 \ltwonorm{\bv^{(k)} - \bv^{(k-1)}}^2   - \ltwonorm{\bz^{(k)} - \bm^{(k)}}^2 },
%&\stackrel{(iii)}{\le} (1-\alpha_k) h_{\gamma,c} (\bz^{(k-1)}) + \alpha_k h_{\gamma,c} (\bv^{(k)}) + \frac{\alpha_k^2}{2 s} \pth{ \alpha_k^2 \ltwonorm{\bv^{(k)} - \bv^{(k-1)}}^2   - \ltwonorm{\bz^{(k)} - \bm^{(k)}}^2 }
\eal%
where $\circled{1}$ is due to the fact that $\supp{\bv_{\cC}^{(k)}} =
\supp{\bz_{\cC}^{(k-1)})}$ and $h_{\gamma,c}$ satisfies $h_{\gamma,c}\pth{ (1-\tau)\bu + \tau \bv } \le (1-\tau)h_{\gamma,c}(\bu) + \tau h_{\gamma,c}(\bv)$ for any two vectors $\bu$, $\bv$ with $\supp{\bu_{\cC}} = \supp{\bv_{\cC}}$ and any $\tau \in (0,1)$.
$\circled{2}$ is due to the fact that $\bm^{(k)} - (1-\alpha_k)\bz^{(k-1)}- \alpha_k \bv^{(k)} = \alpha_k (\bv^{(k-1)} - \bv^{(k)})$ according to eq.(9) in Algorithm 1. %Since $\bm^{(k-1)} = \bbP_{\bv^{(k-1)}}(\bz^{(k-1)})$ by (\ref{eq:convex-uk}), $\supp (\bm^{(k-1)}) \subseteq \supp(\bz^{(k-1)}) $, so that $h_{\gamma,c} (\bm^{(k-1)}) \le h_{\gamma,c} (\bz^{(k-1)})$ and $(iii)$ follows.

Computing $\alpha_k \times$ (\ref{eq:convex-theorem-seg3})
+  (\ref{eq:convex-theorem-seg5}), we have
\bal\label{eq:convex-theorem-seg6}
&\langle \nabla f(\bm^{(k)}), \bz^{(k)}- (1-\alpha_k)\bz^{(k-1)} - \alpha_k \bz \rangle + h_{\gamma,c}(\bz^{(k)}) \nonumber \\
&\le (1-\alpha_k) h_{\gamma,c} (\bz^{(k-1)}) + \alpha_k h_{\gamma,c}(\bz) \nonumber \\
&\phantom{=} + \frac{\alpha_k}{2 \lambda_k} \pth{ \ltwonorm{\bv^{(k-1)} - \bz}^2 -  \ltwonorm{\bv^{(k)} - \bz}^2 } + \pth{ \frac{\alpha_k^2}{2 s} - \frac{\alpha_k}{2 \lambda_k}} \ltwonorm{\bv^{(k)} - \bv^{(k-1)}}^2
-\frac{1}{2 s}  \ltwonorm{\bz^{(k)}- \bm^{(k)}}^2 \nonumber \\
&\stackrel{\circled{1}}{\le} (1-\alpha_k) h_{\gamma,c} (\bz^{(k-1)}) + \alpha_k h_{\gamma,c}(\bz) + \frac{\alpha_k}{2 \lambda_k} \pth{ \ltwonorm{\bv^{(k-1)} - \bz}^2 -  \ltwonorm{\bv^{(k)} - \bz}^2 } -\frac{1}{2 s}  \ltwonorm{\bz^{(k)}- \bm^{(k)}}^2,
\eal%
and $\circled{1}$ is due to $\lambda_k \alpha_k \le s$.

Combining (\ref{eq:convex-theorem-seg1}), (\ref{eq:convex-theorem-seg2}) and (\ref{eq:convex-theorem-seg6}), and noting that ${\tilde F}(\bz) = f(\bz) + h_{\gamma,c} (\bz)$, we have
\bal \label{eq:convex-theorem-seg7}
{\tilde F}(\bz^{(k)}) &\le (1-\alpha_k){\tilde F}(\bz^{(k-1)}) + \alpha_k {\tilde F}(\bz) - \pth{ \frac{1}{2 s} - \frac{L_f}{2}} \ltwonorm{\bz^{(k)}- \bm^{(k)}}^2 \nonumber \\
&\phantom{=}+ \frac{\alpha_k}{2 \lambda_k} \pth{ \ltwonorm{\bv^{(k-1)} - \bz}^2 -  \ltwonorm{\bv^{(k)} - \bz}^2 }.
\eal%

It follows by (\ref{eq:convex-theorem-seg7}) that
\bal \label{eq:convex-theorem-seg8}
{\tilde F}(\bz^{(k)}) - {\tilde F}(\bz) &\le (1-\alpha_k)\pth{ {\tilde F}(\bz^{(k-1)}) -{\tilde F}(\bz) } \nonumber \\
&\phantom{=}- \pth{ \frac{1}{2 s} - \frac{L_f}{2}} \ltwonorm{\bz^{(k)}- \bm^{(k)}}^2 + \frac{\alpha_k}{2 \lambda_k} \pth{ \ltwonorm{\bv^{(k-1)} - \bz}^2 -  \ltwonorm{\bv^{(k)} - \bz}^2 }.
\eal%

Define a sequence $\{T_k\}_{k=1}^{\infty}$ as $T_1 = 1$, and $T_k = (1 - \alpha_k) T_{k-1}$ for $k \ge 2$. Dividing both sides of (\ref{eq:convex-theorem-seg8}) by $T_k$, we have
\bal \label{eq:convex-theorem-seg9}
\frac{{\tilde F}(\bz^{(k)}) - {\tilde F}(\bz)}{T_k} &\le \frac{ {\tilde F}(\bz^{(k-1)}) -{\tilde F}(\bz) }{T_{k-1}} - \frac{1-L_f s}{2 s T_k} \ltwonorm{\bz^{(k)} - \bm^{(k)}}^2 \nonumber \\
&\phantom{=}+ \frac{\alpha_k}{2 \lambda_k T_k} \pth{ \ltwonorm{\bv^{(k-1)} - \bz}^2 -  \ltwonorm{\bv^{(k)} - \bz}^2 }.
\eal%

Since we choose $\alpha_k = \frac{2}{k+1}$, it follows that $T_k = \frac{2}{k(k+1)}$ for all $k \ge 1$. Plugging the values of $\alpha_k$ and $T_k$ in $\frac{\alpha_k}{2 \lambda_k T_k}$ in (\ref{eq:convex-theorem-seg9}), we have
\bal \label{eq:convex-theorem-seg10}
\frac{{\tilde F}(\bz^{(k)}) - {\tilde F}(\bz)}{T_k} &\le \frac{ {\tilde F}(\bz^{(k-1)}) -{\tilde F}(\bz) }{T_{k-1}} - \frac{1-L_f s}{2 s T_k} \ltwonorm{\bz^{(k)}- \bm^{(k)}}^2 \nonumber \\
&\phantom{=}+ \frac{k}{2 \lambda_k} \pth{ \ltwonorm{\bv^{(k-1)} - \bz}^2 -  \ltwonorm{\bv^{(k)} - \bz}^2 } \nonumber \\
&\stackrel{\circled{1}}{\le} \frac{ {\tilde F}(\bz^{(k-1)}) -{\tilde F}(\bz) }{T_{k-1}} - \frac{1-L_f s}{2 s T_k} \ltwonorm{\bz^{(k)}- \bm^{(k)}}^2 + \frac{k}{2 \lambda_k} \ltwonorm{\bv^{(k-1)} - \bz}^2 \nonumber \\
&\phantom{=}-  \frac{k+1}{2 \lambda_{k+1}} \ltwonorm{\bv^{(k)} - \bz}^2,
\eal%
where $\circled{1}$ is due to the condition that $\lambda_{k+1} \ge \frac{k+1}{k} \lambda_k$ for $k \ge 1$.

Set $k_0 = k'+1$. Summing the above inequality for $k=k_0,\ldots,m$ with $m \ge k_0$, we have
\bal \label{eq:convex-theorem-seg11}
\frac{{\tilde F}(\bz^{(m)}) - {\tilde F}(\bz)}{T_m} &\le \frac{ {\tilde F}(\bz^{(k_0-1)}) -{\tilde F}(\bz) }{T_{k_0-1}} + \frac{k_0\ltwonorm{\bv^{(k_0-1)} - \bz}^2}{2\lambda_{k_0}} - \sum\limits_{k=k_0}^{m} \frac{1-L_f s}{2 s T_k} \ltwonorm{\bz^{(k)} - \bm^{(k)}}^2 \nonumber \\
&\le \frac{k_0(k_0-1)\pth{{\tilde F}(\bz^{(k_0-1)}) -{\tilde F}(\bz)}}{2} + \frac{\ltwonorm{\bv^{(k_0-1)} - \bz}^2}
{2\eta} .
\eal%

Since $T_m= \frac{2}{m(m+1)}$, it follows by (\ref{eq:convex-theorem-seg11}) with $z = z^*$ that
\bal \label{eq:convex-theorem-seg12}
&{\tilde F}(\bz^{(m)}) - {\tilde F}(\bz^*) \le \frac{1}{m(m+1)} \cdot
\pth{k_0(k_0-1)\pth{{\tilde F}(\bz^{(k_0-1)}) -{\tilde F}(\bz^*)} + \frac{\ltwonorm{\bv^{(k_0-1)} - \bz^*}^2}{\eta}}.
\eal%

Changing $m$ to $k$ in (\ref{eq:convex-theorem-seg12}) completes the proof.



\end{proof}



\begin{figure*}[!hbt]
\begin{center}
\includegraphics[width=1\textwidth]{yaleb_graph_comp.eps}
\end{center}
   \caption{The comparison between the weighed adjacency matrix $W$ of the sparse graph produced by $\ell^{1}$-graph (right) and SRSG (left) on the Extended Yale Face Database B, where each white dot indicates an edge in the sparse graph.
   }
\label{fig:yaleb-W}
\end{figure*}
\section{Additional Illustration}

Figure~\ref{fig:yaleb-W} illustrates the comparison between the weighed adjacency matrix of $\ell^{1}$-graph and SRSG.



% References
\bibliography{ref}
\end{document}
