%%i%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Spherical clusters: center optimization}
\label{sec:proofs}
%%i%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\subsection{Decomposition of the objective function}
%%ii-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%

\begin{figure}[htb]% or !htb or H
\centerline{ \includegraphics[width=.5\textwidth]{\wgitelandtex/k-subspace/fig/k-subspaces-arrangement/arrangement-3pts-montage.pdf}}
\caption{{\bf Spherical clusters: sinks and associated arrangement on a 2D toy example
with three points $x_1, x_2, x_3$.}
{\bf (Sinks)}  Each circle represents the  sink regions $B_{x_i}$ of a data point $x_i$.
%%
{\bf (Arrangement)} These circles induce decomposition of the square
into 2D cells (colored pieces of disks), 1D cells (circle arcs), and 0D cells (black points). }
%% This decomposition is associated with a na Each colored surface is a
%% cell of dimension 2, each circular arc is a cell of dimension 1, and
%% each black and white point represents a cell of dimension 0.
\label{fig:sink-arrangement} 
\end{figure} 

\begin{figure}[htb]% or !htb or H
\begin{tabular}{cc}
\includegraphics[width=6.5cm]{\wgitelandtex/k-subspace/fig/k-subspaces-arrangement/objectivefunction-min-arrangement-3pts.png}&
\includegraphics[width=6.5cm]{\wgitelandtex/k-subspace/fig/k-subspaces-arrangement/objectivefunction-min-arrangement-3pts-bis.png}
\end{tabular}
\caption{{\bf Spherical clusters: function $F_{\eta}$ and local minima
    on two datasets of three points in 2D (orange bullets).}  Contour
  plot of the objective functions, with the spheres that define the
  arrangement, and the result of BFGS optimization (red bullet).  
%%
Note the discontinuities of the tangent space to level set surfaces when crossing
a sphere bounding a sink ball.
%%
{\bf  Left} The minimum is on a cell of dimension $d-1$.  {\bf Right}
  The minimum is in a cell of dimension $d$.}
\label{fig:Feta-mins}
\end{figure}


\begin{proof}[Lemma \ref{lemma:cvxtildef}]
    $\ftildeeta$ is clearly twice differentiable, and one has its Hessian matrix :
    \begin{equation}
        \mathbb{H}_{\ftildeeta}(c) = 2(1 - \eta \frac{n}{n-1})I_d
    \end{equation}
    where $d$ is the dimension of the ambient space, and $n$ the number of points in $D_\ell$.

    Since a twice differentiable function is convex if and only if its
    Hessian matrix is {\em positive semi-definite} at every point, one
    directly obtains the first result. The second one comes from the
    fact that a twice differentiable function is strictly convex if
    and only if its Hessian matrix is {\em positive definite} at every
    point.
\end{proof}


\begin{proof}[Lemma \ref{lem:sink-geom}]
Using  $f_{\eta,x_i}(c) = \max\left(0, \ftildeeta{c} \right)$ in the scope
of the definition of the sink region $B_{x_i} = f_{\eta,x_i}^{-1}\left(
\{0\} \right) $ yields
\begin{equation}
B_{x_i} = \left\{ c \in \Rd : \|x_i-c\|^2 - \eta \frac{1}{n-1} \sum_{x_j \in D_\ell} \|x_j-c\|^2 \le 0 \right\}
        \label{eqn:reformulationexi}
    \end{equation}

Let  $\eta' := \eta \frac{n}{n-1}$.

Then, denote $c = (c_1,\dots,c_d) \in \Rd$. The condition $c \in B_{x_i}$ rewrites :
    \begin{align*}
        c \in B_{x_i} &\iff \|x_i-c\|^2 - \eta \frac{1}{n-1} \sum_{x_j \in D_\ell} \|x_j-c\|^2 \le 0 \\
        &\iff \|x_i\|^2 + \|c\|^2 - 2\langle x_i,c\rangle - \eta \frac{1}{n-1} \sum_{x_j \in D_\ell} \|x_j\|^2 + \|c\|^2 - 2\langle x_j,c\rangle \le 0 \\
        &\iff (1-\eta')\|c\|^2 - 2\langle x_i - \eta'\bar{x}, c \rangle + \|x_i\|^2 - \frac{\eta'}{n} \sum_{x_j \in D_\ell} \|x_j\|^2 \le 0
    \end{align*}
    where $\bar{x} = \frac{1}{n} \sum_{x_j \in D_\ell} x_j$, and $0 <\eta' = \eta\frac{n}{n-1} < 1$. Then, we have :
    \begin{align*}
        c \in B_{x_i} &\iff (1-\eta')\left[ \|c\|^2 - 2\left\langle \frac{x_i - \eta'\bar{x}}{1-\eta'}, c \right\rangle + \frac{\|x_i\|^2 - \frac{\eta'}{n} \sum_{x_j \in D_\ell} \|x_j\|^2}{1-\eta'} \right] \le 0 \\
        &\iff \left\| c - \frac{x_i - \eta'\bar{x}}{1-\eta'} \right\|^2 - \left\| \frac{x_i - \eta'\bar{x}}{1-\eta'} \right\|^2 + \frac{\|x_i\|^2 - \frac{\eta'}{n} \sum_{x_j \in D_\ell} \|x_j\|^2}{1-\eta'} \le 0 \\
        &\iff \left\| c - \frac{x_i - \eta'\bar{x}}{1-\eta'} \right\|^2 \le \left\| \frac{x_i - \eta'\bar{x}}{1-\eta'} \right\|^2 - \frac{\|x_i\|^2 - \frac{\eta'}{n} \sum_{x_j \in D_\ell} \|x_j\|^2}{1-\eta'}
    \end{align*}
%%
Denote 
\begin{equation*}
R^2 := \left\| \frac{x_i - \eta'\bar{x}}{1-\eta'} \right\|^2 - \frac{\|x_i\|^2 - \frac{\eta'}{n} \sum_{x_j \in D_\ell} \|x_j\|^2}{1-\eta'}.  
\end{equation*}
%%
Then three cases are possible:
    \begin{itemize}
        \item $R^2 > 0$ and $B_{x_i}$ is a ball of $\Rd$ centered at $\frac{x_i - \eta'\bar{x}}{1-\eta'}$ and of radius $R$
        \item $R^2 = 0$ and $B_{x_i} = \left\{\frac{x_i - \eta'\bar{x}}{1-\eta'}\right\}$
        \item $R^2 < 0$ and $B_{x_i} = \emptyset$
    \end{itemize} 
    Looking at the definition of $B_{x_i}$, it is obvious that $x_i \in B_{x_i}$. Therefore, $B_{x_i}$ is a non-empty ball, and the previous equivalences give that its center is :
    \begin{equation}
        c_i := \frac{x_i - \eta' \bar{x}}{1 - \eta'}
    \end{equation}
    Moreover, if $\bar{x} \neq x_i$, we obtain that $B_{x_i}$ is a non-trivial ball of $\Rd$. 
\end{proof}



\begin{proof}[Lemma \ref{lemma:analysissubfunctions}]
    $f_{\eta,x_i}$ is obviously continuous and convex as the pointwise maximum of two continuous and convex functions. The definition of $B_{x_i}$ directly gives the fact that $f_{\eta,x_i} (B_{x_i}) = \{0\}$.

    By definition of $f_{\eta,x_i}$ and $B_{x_i}$, we have 
    \begin{equation*}
        f_{\eta,x_i}|_{\Rd \setminus B_{x_i}} = \ftildeeta |_{\Rd \setminus B_{x_i}}
    \end{equation*}
    and by continuity of both $f_{\eta,x_i}$ and $\ftildeeta$ one obtains
    \begin{equation*}
        f_{\eta,x_i}|_{\overline{\Rd \setminus B_{x_i}}} = \ftildeeta |_{\overline{\Rd \setminus B_{x_i}}}
    \end{equation*}
    $\ftildeeta$ is by definition a quadratic form, and by lemma \ref{lemma:cvxtildef}, we have that $\ftildeeta$ is strictly convex on $\Rd$. Hence, $f_{\eta,x_i}$ is a non-zero quadratic form, and it is strictly convex on every convex subset of $\overline{\Rd \setminus B_{x_i}}$.
\end{proof}


\subsection{Arrangement underlying the objective function}
%%ii-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%-%

%% xfc now in main text
\begin{comment}
\begin{lemma} 
\label{lemma:arrangementforpiecewisequadratic-bis}
The arrangement $\calA$ defined earlier is a partition that verifies the following property:
\begin{equation*}
\forall \calC \in \calA, 
F_\eta|_\calC \text{ is the restriction of a quadratic form}
\end{equation*}
\end{lemma}
\end{comment}

\begin{proof}[Lemma \ref{lemma:arrangementforpiecewisequadratic}]
The fact that $\mathcal{A}$ is a partition of $\Rd$ is obvious :
the union of all cell is $\Rd$ since every point has a signature
and therefore belongs to one of the connected components of its
equivalence class, and we only take in $\mathcal{A}$ the non-empty
classes. Since every point only has one possible signature, it cannot
belong to two distinct classes, so the cells do not intersect each
other.

    Then, take $\calC \in \mathcal{A}$. By definition
    \begin{equation*}
        j \in \supportcell{\calC} \iff \calC \cap B_{x_j} = \emptyset
    \end{equation*}
    Hence, we obtain 
    \begin{equation}
        F_\eta|_\calC = \sum_{j \in \supportcell{\calC}} \ftildeeta[x_j]{c}
        \label{eqn:fetarestricted-bis}
    \end{equation}
    that is, $F_\eta|_\calC$ is the restriction of a quadratic form to $\calC$. 
\end{proof}
\toblack

%% xfc \supportpoint used only once in the proof of the lemma: removed
\begin{comment}
The following definition will be used in the proof of the next lemma:
\begin{definition} \label{def:indices}
    Let $\supportpoint$ be the following application :
    \begin{equation}
        \supportpoint \; : \; \left\lbrace \begin{aligned}
            \Rd &\to \mathcal{P}([\![ 1,n ]\!]) \\
            x &\mapsto \left\{ i \in [\![ 1,n ]\!] : x \notin B_{x_i} \right\}
        \end{aligned} \right.
    \end{equation}
    The set $\supportpoint{x}$, called {\em support} of the point $x$, contains the indices of the non-zero sub-functions $f_{\eta,x_i}$ at $x$.
\end{definition}
\end{comment}

\begin{lemma} [Intersection of a segment with a cell] \label{lemma:intersect}
    Let $S$ be a line segment supported by the line $L$, and $\calC$ be a cell of $\mathcal{A}$. According to the type of cell, the intersection $S \cap \calC$ can take different shapes :
    \begin{itemize}
        \item {\em If $\calC$ is a full-dimensional cell}, the intersection of $S$ and $\calC$ is either empty or a union of open intervals of $S$
        \item {\em If $\calC$ is not a full-dimensional cell}, the intersection of $S$ and $\calC$ is either empty, a singleton, or the union of two singletons
    \end{itemize}
\end{lemma}

\begin{proof}
    {\em First assume that $\calC$ is full-dimensional}, i.e. that it is of dimension $d$ in $\Rd$. Then, its signature has only non-zero values, which gives :
    \begin{equation*}
        \calC = \left( \bigcap_{i \in \supportcell{\calC}} \Rd \setminus B_{x_i} \right) \cap \left( \bigcap_{i \notin \supportcell{\calC}} \overset{\circ}{B_{x_i}} \right)
    \end{equation*}
    Thus, $\calC$ is an open set. Therefore, the intersection is a subset of $S$, open in the topology relative to $S$. Hence, it is either empty, or the union of its connected components, that are open intervals in the topology relative to $S$.

    {\em In the case where $\calC$ is not a full dimensional
      cell}, it has at least one zero in its signature. Therefore, it
    is supported by at least one $d$-dimensional sphere
    $\mathbb{S}$. Because $S \subset L$ and $\calC \subset
    \mathbb{S}$, one trivially has $S \cap \calC \subset L \cap
    \mathbb{S}$. Then, observe that the intersection of a line and a
    sphere is given by the roots of a degree 2 polynomial.   This
    yields that $S \cap \calC$ contains either zero, one or two
    points.
\end{proof}

\begin{proof}[Theorem \ref{thm:strcvxf}]
    Assume $0 < \eta < 1- \frac{1}{n}$. Let $\mathcal{A}$ be the arrangement defined at equation \ref{eqn:arrangement}, and $\calC$ a cell of $\mathcal{A}$. As stated in lemma \ref{lemma:arrangementforpiecewisequadratic}, $F_\eta$ restricts to a quadratic form on $\calC$ :
    \begin{equation*}
        F_\eta|_\calC = \sum_{j \in \supportcell{\calC}} \ftildeeta[x_j]{c}
    \end{equation*}
    Moreover, we claim that $\supportcell{\calC} \neq \emptyset$. Indeed, for $c \in \calC$, there is a point $x_i$ that verifies 
    \begin{equation*}
        \forall j, \quad \|x_i-c\| \ge \|x_j-c\|
    \end{equation*}
    Then, we obtain 
    \begin{align*}
        \eta \frac{1}{n-1} \sum_{x_j \in D_\ell} \|x_j-c\|^2 &\le \eta \frac{n}{n-1} \|x_i-c\|^2 \\
        &< \|x_i-c\|^2
    \end{align*}
    because $0 <\eta < 1-\frac{1}{n}$ and $x_i \neq c$ since there are at least two distinct points in $D_\ell$ by hypothesis. This yields $c \notin B_{x_i}$.
%% , i.e. $i \in \supportpoint{c}$.
Since $c \in \calC$, this gives $i \in \supportcell{\calC}$, thus it is not empty. With $\calC \subset \bigcap_{j \in \supportcell{\calC}} \Rd \setminus B_{x_j}$, we obtain that $F_\eta|_\calC$ is strictly convex on each convex subset of $\calC$, as a sum of strictly convex functions.

    We have proved that $F_\eta$ is strictly convex on every convex subset of some cell of $\mathcal{A}$. We will now prove that $F_\eta$ is strictly convex on $\Rd$. Take $x,y \in \Rd$, and $\lambda \in ]0,1[$. We want to show
    \begin{equation}
        F_\eta((1-\lambda)x+\lambda y) < (1-\lambda)F_\eta(x) + \lambda F_\eta(y)
        \label{eqn:strconvexity}
    \end{equation}
    The two points $x$ and $y$ define a line segment $S$ in $\Rd$. Observe that having the strict convexity on $S$ directly gives the result, and therefore is sufficient to prove the strict convexity on $\Rd$. 
    
Denote $\calC_{i_1}, \dots, \calC_{i_\ell}$ the cells with
non-empty intersection with $S$. For each cell, denote
$\mathcal{U}_{i_l,m}$ the different connected components of the
intersection $\calC_{i_l} \cap S$. For the sake of simplicity,
denote $\mathcal{V}_1, \dots, \mathcal{V}_q$ the connected components
of all considered intersections, sorted in encountering order while
going form $x$ to $y$. $\mathcal{V}_1, \dots, \mathcal{V}_q$ is a
partition of $S$. An example of the segment is given in figure
\ref{fig:segment-in-arrangement}.
    
By lemma \ref{lemma:intersect}, we have that $\mathcal{V}_k$ is either
a singleton, or an open interval of $S$, so each $\mathcal{V}_k$ is a
convex subset of the cell it belongs to. Thus, $F_\eta$ is strictly
convex on each $\mathcal{V}_k$.
    
Recalling that $F_\eta$ is convex on $S$, since we established its
convexity on $\Rd$ as the sum of convex functions at the beginning
of the section, and that it is strictly convex on each set of a finite
partition of $S$, one obtains that it is {\em strictly convex} on
$S$. As stated before, the strict convexity on every segment of a
convex set, gives the overall strict convexity, therefore the result
of the theorem.
\end{proof}

\begin{figure}
\begin{tabular}{cc}
\includegraphics[width=6.5cm]{ \wgitelandtex/k-subspace/fig/k-subspaces-arrangement/segment-3pts.png}&
\includegraphics[width=6.5cm]{\wgitelandtex/k-subspace/fig/k-subspaces-arrangement/segment-only-3pts.png}\\
%%
Segment in the arrangement & Segment alone
\end{tabular}
\caption{{\bf Two representations of the same segment :} {\bf (Left)} The segment is represented on the arrangement. Remark that it crosses the gray cell twice, since cells are not necessarily convex. {\bf (Right)} A representation of the segment, colored according to the cells crossed. Dot-like shapes above some rectangles signal the punctual intersections, while other rectangles represent open intervals of $S$.}
\label{fig:segment-in-arrangement}
\end{figure}

%%\clearpage
%% xfc now covered by the non smooth optim
%%iii%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%iii%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%iii%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{comment}
\subsection{Computing the optimal center: exact algorithm}
%%\label{sec:exact-center}
%%iii%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\paragraph{Exact algorithm.}
Exploiting the decomposition of the ambient space defined
by the arrangement of spheres associated with sinks, 
we inspect in turn  the cases of cells of different dimension 
that may host the  the optimum $c^*$.

The analysis uses the notion of subgradient. Consider a convex
function $f: U \to \Rnt$, where $U$ is an open convex set of $\Rd$. A
subgradient of $f$ at $x_0$ is a vector $g$ such that for all $x \in
U$ one has:
\begin{equation}
\label{eqn:def-subgradient}
f(x) - f(x_0) \ge g \cdot (x-x_0)
\end{equation}

%% As already noticed, In order to find an algorithm, we started with the
%% observation that the arrangement defines a partition of the ambient
%% space. This gives the existence of a cell $\calC$ that contains
%% We start with some results for specific dimensions of these
%% cells, and give an illustration of this in figure
%% \ref{fig:Feta-mins}

\paragraph{$d$-dimensional cell.}
If $\calC$ is a full-dimensional cell, lemma
\ref{lemma:arrangementforpiecewisequadratic} gives that $F_\eta$
restricts  to the quadratic form $F_{\eta,\supportcell{\calC}}$ on
$\calC$, thus that it is differentiable at $c^*$ because $c^*$
is an interior point of $\calC$. Then, one has
\begin{equation}
    \nabla F_{\eta,\supportcell{\calC}} (c) = 0 \iff c = \frac{\bar{x}_{\supportcell{\calC}} - \eta' \bar{x}  }{1 - \eta'}
\end{equation}
which gives us the exact expression of the minimum of the function.

\paragraph{$d-1$-dimensional cell.}
If $\calC$ is of dimension $d-1$, then, it separates two
full-dimensional cells $\calC_1$ and $\calC_2$. We know
that $\calC$ is supported by exactly one sphere of the
arrangement. Denote the point associated to that sphere $x_i$, and
suppose without loss of generality that $\mathcal{J}(\calC_2) =
\mathcal{J}(\calC_1) \cup \{i\}$. For each point $x$ on
$\calC$, $F_\eta$ has two subgradients on $x$ :
\begin{equation*}
    g_1(x) = \nabla F_{\eta,\mathcal{J}(\calC_1)} (x) \quad \text{and} \quad g_2(x) = \nabla F_{\eta,\mathcal{J}(\calC_2)} (x)
\end{equation*}
Since $c^* \in \calC$ is the global minimum of the function, one has $g_1(x) = \alpha g_2(x)$, with $\alpha < 0$.
%
%
Therefore, to find the exact solution, we solve the following equation for $\alpha$
\begin{equation} \label{eqn:syst-gradient-colinearity}
        \nabla F_{\eta,\mathcal{J}(\calC_1)} (x) = \alpha \nabla F_{\eta,\mathcal{J}(\calC_2)} (x)
\end{equation}
Since each $F_{\eta, \mathcal{J}(\calC_i)}$ is a quadratic form,
its gradient is an affine function of $x = (x_1, \dots, x_d)$. Hence,
the equation rewrites as a non-singular linear system of $d$
equations, and $d$ variables.
%
%
Then, we consider the following system, that reformulates the constraint $x \in \calC$:
\begin{equation} \label{eqn:syst-quad-constraints}
    \left\lbrace \begin{aligned}
        & \ftildeeta{x}  = 0 \\
        & \forall j \in \mathcal{J}(\calC_1), \ftildeeta{x}  > 0 \\
        & \forall j \notin \mathcal{J}(\calC_2), \ftildeeta{x}  < 0
    \end{aligned} \right.
\end{equation}
This is a system of $n$ quadratic constraints. Remarkably, after substituting $x$ by the solution of \ref{eqn:syst-gradient-colinearity}, it is an over-constrained system of one variable $\alpha$.
%
%
If there is a solution $\alpha_0$, then there are three cases : either
$\alpha_0$ is negative, null, or it is positive. Obviously, if it is
null, then $g_1 = 0$. Hence, using the linear lower bound on $F_\eta$
given by equation \ref{eqn:def-subgradient}, this gives that $x$ is
the global minimum of $F_\eta$. Then, if $\alpha > 0$, both
subgradients are collinear and point to the same direction. On both
cells $\calC_1$ and $\calC_2$, the function increases in
the direction of the subgradients, and decreases in the opposite
one. The point cannot be the global optimum, which contradicts our
initial assumption. Finally, if $\alpha < 0$, both subgradients are
collinear but in opposite directions. The function increases in every
direction, and the solution to the system is the global optimum of the
function.

\paragraph{Lower dimensional cells.}
If the cell has a dimension that is lower than $d-1$, the strategy mimics the previous one.
That is, one seeks a vanishing linear combination of subgradients.
\end{comment}
%%iii%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%iii%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%iii%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\begin{comment}
If the cell has a dimension that is lower than $d-1$, the latter
result has not been generalized yet, and we also leave this as
immediate future work. Nevertheless, we think this could be done using
Clarke subgradients and the definition of a stationary point as a
point that is contained is the convex hull of its subgradients'
ends/heads.
\end{comment}

\begin{comment}
The idea for an algorithm is to use these observations in order to
explore the arrangement. Starting from a point, one uses its signature
to know the cell it belongs to. Then, one looks if there is a solution
in that cell, in which case we have found the global minimum, and if
not one descends the subgradient until one reaches a new cell. At this
point, one looks for a solution on the low dimensional cell that
separates both the old and new cell if there is one, and if not, one
decides whether to go through and continue in the new cell or to
continue on the lower dimensional cell. This idea should improve on
subgradient descent because of the help the arrangement provides, but
the steps needed to check whenever one has escaped the cell or not add
a computational cost and will need a number type supporting exact
predicates~\cite{kettner2008classroom}.  We leave this idea for future
work, but believe it could provide an algorithm to compute the global
minimum. For the moment, we use the BFGS optimizer which, although not
ideal, is functional.
\end{comment}

