% \documentclass{uai2023} % for initial submission
\documentclass{uai2023} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2023} % ptmx math instead of Computer
                                         % Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2023} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent

\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)
\usepackage{hyperref}
\usepackage{smile}

\usepackage{xr}
\externaldocument{piwek_789}

\setcounter{section}{5}
\setcounter{figure}{5}


\title{
Exact Count of Boundary Pieces of ReLU Classifiers: \\
Towards the Proper Complexity Measure for Classification}

% The standard author block has changed for UAI 2023 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is automatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{Pawe\l~Piwek}
\author[2]{Adam~Klukowski}
\author[2]{\href{mailto:<hutianyang.up@outlook.com>?Subject=Your UAI 2023 paper}{Tianyang~Hu}}
% Add affiliations after the authors
\affil[1]{%
    University of Oxford\\ 
    \texttt{pawel.piwek@maths.ox.ac.uk}
}
\affil[2]{%
    Huawei Noah's Ark Lab\\
    \texttt{hutianyang1@huawei.com}
}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% THEOREMS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% \theoremstyle{plain}
% \newtheorem{theorem}{Theorem}[section]
% \newtheorem{proposition}[theorem]{Proposition}
% \newtheorem{lemma}[theorem]{Lemma}
% \newtheorem{corollary}[theorem]{Corollary}
% \theoremstyle{definition}
% \newtheorem{definition}[theorem]{Definition}
% \newtheorem{assumption}[theorem]{Assumption}
% \theoremstyle{remark}
% \newtheorem{remark}[theorem]{Remark}

% Todonotes is useful during development; simply uncomment the next line
%    and comment out the line below the next line to turn off comments
%\usepackage[disable,textsize=tiny]{todonotes}
\usepackage[textsize=tiny]{todonotes}

% Added by adam
\usepackage{svg}
\usepackage[capitalize,noabbrev]{cleveref}

% \newtheorem{proposition}{Proposition}
% \theoremstyle{definition}
% \newtheorem{definition}{Definition}
% \newtheorem{eg}{Example}
% \theoremstyle{remark}
% \newtheorem*{note}{Note}

\renewcommand{\vec}[1]{\mathbf{#1}}
\newcommand{\x}{\vec{x}}
\newcommand{\y}{\vec{y}}
\newcommand{\R}{\mathbb{R}}
\newcommand{\D}{\mathcal{D}}
\newcommand{\U}{\mathcal{U}}
\newcommand{\F}{\mathcal{F}}
\newcommand{\PU}{\mathcal{PU}}
\newcommand{\T}{\mathcal{T}}

\usepackage{xcolor}

\counterwithout{theorem}{section}
\crefname{example}{Example}{Examples}
\crefname{proposition}{Proposition}{Propositions}

\begin{document}

\newpage
\onecolumn
\section*{Appendix}\label{appendix}

\subsection*{Proof of Proposition \ref{CPLs}}

\begin{proof}
    Denote \(\vec{z} = t \x + (1-t) \y\) and assume that at \(\vec{z}\) the \(i\)-th function is largest, i.e. \(F(\vec{z}) = A_i \vec{z} + b_i\). Then
    \begin{align*}
        F(\vec{z}) = t (A_i \x + b_i) + (1 - t) (A_i \y + b_i) \leq t F(\x) + (1 - t) F(\y)
    \end{align*}
\end{proof}

\subsection*{Proof of Proposition \ref{DCPAs}}

\begin{proof}
    The proof is by induction. We need to prove two facts. First, applying a linear function to a vector of DCPAs produces another vector of DCPAs; second, that a maximum of two DCPAs is a DCPA.
    
    Let \(F-G\) be a vector of DCPAs, where \(F\) and \(G\) are vectors of \(n\) CPAs and \(A\) be an \(m\times n\) matrix with real coefficients. Write \(A = A_+ - A_-\) where both \(A_+\) and \(A_-\) have non-negative entries. Then we have
    \[A\:(F-G) = (A_+ - A_-)\:(F-G) = (A_+F + A_-G) - (A_-F + A_+G).\]
    This proves the first fact.
    
    The second fact is easy to see from \(\max\{a,b\}+c = \max\{a+c,b+c\}\) and \(\max\{a,\max\{b,c\}\} = \max\{a,b,c\}\).
\end{proof}

\subsection*{Proof of Proposition \ref{duality}}

\begin{proof}
The proof follows the following steps. 
    \begin{enumerate}
        \item Let \(\vec{c} = (\vec{a}, b)\) and \(H = (\vec{a} \mapsto \x^\intercal \vec{a}^\intercal + y\). Then both \(\vec{c} \in H\) and \(\cR(H) \in \cR(\vec{c})\) are equivalent to \(b = \x^\intercal \vec{a} + y\).
        \item \(k\)-dimensional dual plane \(F\) can be written as an intersection of \(d-k\) dual hyperplanes \(\cR^{-1}(\vec{z}_0), \dots, \cR^{-1}(\vec{z}_{d-k})\). A dual point \(\cR^{-1}(f)\) belongs to \(F\) if and only if it is a dual of a real hyperplane \(f\) that contains the real points \(\vec{z}_0, \dots, \vec{z}_{d-k}\). Their affine span is the common plane we are looking for, and what we christen \(\cR(F)\).
        
        It is affinely spanned by \(d-k+1\) points, so its dimension is at most \(d-k\). If it was smaller, we could forget some \(\vec{z}_i\), which means that \(F\) was an intersection of \(d-k-1\) hyperplanes, and had dimension at least \(k+1\).
        \item \(F\) is contained in \(G\) if and only if for any hyperplane \(H\) we have \[G \subseteq H \Rightarrow F \subseteq H\] This happens precisely when for all points \(\vec{z} = \cR(H)\) we have \[\vec{z} \in \cR(G) \Rightarrow \vec{z} \in \cR(F)\] that is \(\cR(G) \subseteq \cR(F)\).
        \item Let \(f : \x \mapsto \vec{a}^\intercal \x + b\). Then \(p(\cR^{-1}(f)) = \vec{a}\), which is perpendicular to surfaces \(\vec{a}^\intercal \x = \text{const.}\)
        \item Let \(\vec{c} = (\vec{a}, b)\) and \(H : \vec{d} \mapsto \x^\intercal \vec{d} + y\). Then both \(\vec{c} \succ H\) and \(\cR(\vec{c}) \succ \cR(H)\) are equivalent to \(b > \vec{x}^\intercal \vec{a} + y\).
        \item Suppose \(\vec{c} = (\vec{a}, b),  \vec{c}' = (\vec{a}, b + \Delta)\), and denote \(f : \x \mapsto \vec{a}^\intercal \x + b\). Then \(\cR(\vec{c}) = f, \cR(\vec{c}') = f + \Delta\) -- these functions differ by a constant, so specify parallel planes. The proof for \(\cR^{-1}\) is analogous.
    \end{enumerate}
\end{proof}

\subsection*{Proof of Proposition \ref{max-hull}}

\begin{proof}
    Firstly, let us compare the planes dual to two points, \(\vec{s}_1\) and \(\vec{s}_2\), such that \(\vec{s}_1\) lies directly above \(\vec{s}_2\). This means that they differ only at the very last coordinate---let's say that \(\vec{s}_1 = (\vec{a}_1,b_1)\) and \(\vec{s}_2 = (\vec{a}_2,b_2)\) where \(b_1 \ge b_2\). Then the dual planes \(\cR(\vec{s}_1)\) and \(\cR(\vec{s}_2)\) are precisely
    \[\cR(\vec{s}_1) = \{(\vec{x},y_1) | y_1 = (\vec{a}_1)^\intercal \x + b_1\}, \] 
    \[\cR(\vec{s}_2) = \{(\vec{x},y_2) | y_2 = (\vec{a}_2)^\intercal \x + b_2\},\]
    and since \((\vec{a}_1)^\intercal\x + b_1 \geq (\vec{a}_2)^\intercal\x + b_2\) for all \(\x \in \R^d\), the plane \(\cR(\vec{s}_1)\) lies above \(\cR(\vec{s}_2)\).
    
    Secondly, let us consider a point \(\vec{s}\) in the dual space lying on a segment whose endpoints are \(\vec{s}_1\) and \(\vec{s}_2\). But then for some \(p \in [0,1]\) we have \(\vec{s} = p\cdot \vec{s}_1 + (1-p)\cdot \vec{s}_2\) and thus
    \[(\vec{s})^\intercal \begin{bmatrix} \x \\ 1\end{bmatrix} = p\cdot \Bigg((\vec{s}_1)^\intercal \begin{bmatrix} \x \\1 \end{bmatrix}\Bigg) + (1-p)\cdot \Bigg((\vec{s}_2)^\intercal \begin{bmatrix} \x \\ 1 \end{bmatrix}\Bigg),\]
    so, in particular,
    \[(\vec{s})^\intercal \begin{bmatrix} \x \\ 1\end{bmatrix} \leq \max \Bigg\{(\vec{s}_1)^\intercal \begin{bmatrix} \x \\1 \end{bmatrix},\quad (\vec{s}_2)^\intercal \begin{bmatrix} \x \\ 1 \end{bmatrix}\Bigg\}.\]
    
    Thirdly, we want to piece the two together. For a point \(\vec{s}_2\) lying below \(\U(S)\), let us choose a point \(\vec{s}_1\in \U(S)\) lying exactly above \(\vec{s}_2\). The plane defined by it lies above the one defined by \(\vec{s}_2\) according to the first paragraph. Now we only need to show that points on \(\U(S)\) define planes lying below the minimum, but this follows from the second paragraph and the fact that all points on a convex hull of a finite set of points can be generated by taking segments whose ends lie in the hull and adding all of the points of the segment to the hull.
\end{proof}

\subsection*{Proof of Proposition \ref{basic-properties}}

\begin{proof}
    This is a straightforward consequence of the more elementary identities for scalar \(a, b\): after reducing to upper hulls we have
    \begin{align}
        (a + b) X =& (a X) \oplus (b X) \label{apdx:scalar_right_distributivity}\\
        a (X \oplus Y) =& (a X) \oplus (a Y) \\
        (a b) X =& a (b X) \\
        a (X \cup Y) =& (a X) \cup (a Y)
    \end{align}
    Except~\ref{apdx:scalar_right_distributivity}, all of these hold even before taking the hull. To deal with this one, note that
    \begin{align*}
        (a + b) X &= \{ ax + bx | x \in X \} \\
        &\subseteq \{ ax_1 + bx_2 | x_1, x_2 \in X \} = (aX) \oplus (bX)
    \end{align*}
    so we have \(\U((a + b) X) \subseteq \U \big( (a X) \oplus (b X) \big)\). To see the reverse inclusion, write
    \begin{equation*}
        a x_1 + b x_2 = \tfrac{a}{a+b} (a+b) x_1 + \tfrac{b}{a+b} (a+b) x_2
    \end{equation*}
    which means that
    \begin{equation*}
        (a X) \oplus (b X) \subseteq \U \big( (a+b) X \big)
    \end{equation*}
\end{proof}

\subsection*{Proof of Proposition \ref{explicit}}

\begin{proof}
    Firstly, let us note that
    \begin{align*}
        A_l\: F_{l-1} =& (A_l^+ - A_l^-)\: \big(\cR(P_{l-1}) - \cR(N_{l-1})\big) \\
     =& \big(A_l^+\: \cR(P_{l-1}) + A_l^-\: \cR(N_{l-1})\big) \\
     & - \big(A_l^-\: \cR(P_{l-1}) + A_l^+\: \cR(N_{l-1})\big) \\
     =& \cR\big((A_l^+\otimes P_{l-1}) \oplus (A_l^- \otimes N_{l-1})\big) \\
     &- \cR\big((A_l^-\otimes P_{l-1}) \oplus (A_l^+ \otimes N_{l-1})\big).
    \end{align*}
    Now, we use the fact that \(\max\{x-y,0\} = \max\{x,y\}-y\) to get that for \(N_l = (A_l^-\otimes P_{l-1}) \oplus (A_l^+ \otimes N_{l-1})\), we have
    \begin{align*}
        &\sigma(A_l\: F_{l-1}) \\
        &= \max\{\cR\big((A_l^+\otimes P_{l-1}) \oplus (A_l^- \otimes N_{l-1})\big), \cR(N_l)\} - \cR(N_l)\\
        &=\cR\big((A_l^+\otimes P_{l-1}) \oplus (A_l^- \otimes N_{l-1}) \cup N_l\big) - \cR(N_l),
    \end{align*}
    and thus, for \(P_l = (A_l^+\otimes P_{l-1}) \oplus (A_l^- \otimes N_{l-1}) \cup N_l\), we get
    \[F_l = \sigma(A_l F_{l-1}) = \cR(P_l) - \cR(N_l).\]
\end{proof}

\subsection*{Proof of Proposition \ref{scary}}

\begin{proof}
    \(k\)-cell of \(\T(S)\) is the region defined by the system
    \begin{align}
        f_{i_0} (\x) =& \dots = f_{i_{d-k}} (\x) \label{decision_boundary_linear}\\
        f_{i_0} (\x) \geq& f_j(\x) \text{ for } j \neq i_0, \dots, i_{d-k} \nonumber
    \end{align}
    This can be written as \[(\x, y) \in f_{i_0}, \dots, f_{i_{d-k}} \qquad (\x, y) \succcurlyeq f_j\] In dual space this becomes 
    \[\cR^{-1}((\x, y)) \ni \cR^{-1}(f_{i_0}), \dots, \cR^{-1}(f_{i_{d-k}}) \]
    \[\cR^{-1}((\x, y)) \succcurlyeq \cR^{-1}(f_j)\] Therefore, the duals of points of the \(k\)-cell are precisely the dual planes containing the \((d-k)\)-cell on vertices \(\cR^{-1}(f_{i_0}), \dots, \cR^{-1} (f_{i_{d-k}})\) and tangent to the upper convex hull.
\end{proof}

\subsection*{Proof of Proposition \ref{new-easy}}

\begin{proof}
    The cell of \(\T(P \cup N)\) is a boundary cell iff in the equation~\ref{decision_boundary_linear}, we have both some function \(f_i \in \cR(P)\) and some function \(g_j \in \cR(N)\). This happens exactly when the dual cell has some vertex \(\cR^{-1}(f_i) \in P\) as well as some vertex \(\cR^{-1}(g_j) \in N\).
\end{proof}

\subsection*{Proof of Proposition \ref{new-hard}}

\begin{proof}
    Again, as before, we need to identify those linear pieces of \(\max\{F,G\}\), which lie on the linear pieces of \(F\) and of \(G\). However, this means identifying cells of \(\U(P \cup N)\) which \emph{contain} a cell of \(\U(P)\) and a cell of \(\U(N)\) (this is due to the duality reversing containment of hyperplanes; we mean set-wise containment here, not containment as subcells).
\end{proof}

\subsection*{Proof of Proposition \ref{DCPL_affine_pieces}}

\begin{proof}
    A \(k\)-dimensional cell \(\sigma\) is the set of \(\x\) satisfying the system
    \begin{align*}
        f_{i_0} (\x) &= \dots = f_{i_a} (\x) = s > f_{i'} (\x) \\
        g_{j_0} (\x) &= \dots = g_{j_b} (\x) = t > g_{j'} (\x)
    \end{align*}
    Where \(a + b = d - k\). This can be expressed as relations in the real space
    \begin{align*}
        (\x, s) &\in f_{i_0}, \dots, f_{i_a} \qquad (\x, s) \succ f_{i'} \\
        (\x, t) &\in g_{j_0}, \dots, g_{j_b} \qquad (\x, t) \succ g_{j'}
    \end{align*}
    After passing to the dual space this becomes
    \begin{align}
        \cR^{-1} \big( (\x, s) \big) &\ni \cR^{-1} (f_{i_0}), \dots, \cR^{-1} (f_{i_a}) \label{eqn:pts_on_face_f} \\
        \cR^{-1} \big( (\x, s) \big) &\succ \cR^{-1} (f_{i'}) \label{eqn:pts_below_face_f} \\
        \cR^{-1} \big( (\x, t) \big) &\ni \cR^{-1} (g_{j_0}), \dots, \cR^{-1} (g_{j_b}) \label{eqn:pts_on_face_g} \\
        \cR^{-1} \big( (\x, t) \big) &\succ \cR^{-1} (g_{j'}) \label{eqn:pts_below_face_g}
    \end{align}
    We know that \(\cR^{-1} \big( (\x, s) \big) \) and \(\cR^{-1} \big( (\x, t) \big) \) are a pair of parallel hyperplanes; the former is tangent to \(\U(P)\) (\ref{eqn:pts_below_face_f}) and contains its \(a\)-cell (\ref{eqn:pts_on_face_f}), while the latter is tangent to \(\U(N)\) (\ref{eqn:pts_below_face_g}) and contains its \(b\)-cell (\ref{eqn:pts_on_face_g}).
    
    View these hyperplanes as subsets of \(\mathbb{R}^{d+1}\) and consider their Minkowski sum \(\cR^{-1} \big( (\x, s) \big) \oplus \cR^{-1} \big( (\x, t) \big)\). It is straightforward to verify that it equals the hyperplane \(\cR^{-1} \big( (\x, s + t) \big)\). Since the relation \(\succ\) of lying above is preserved by translations, we have
    \begin{equation*}
        \cR^{-1} \big( (\x, s + t) \big) = \cR^{-1} \big( (\x, s) \big) \oplus \cR^{-1} \big( (\x, t) \big) \succcurlyeq \cR^{-1} (f_i) + \cR^{-1} (g_j) \qquad\qquad \text{for all } \cR^{-1} (f_i) \in P, \cR^{-1} (g_j) \in N
    \end{equation*}
    This means that the plane \(\cR^{-1} \big( (\x, s + t) \big)\) is tangent to \(\U(P \oplus N)\). Also, it contains the \((a + b = d - k)\)-cell \(\sigma'\) on vertices
    \begin{equation}\label{eqn:cell_in_minkowski_sum_PN}
        \left\{ \cR^{-1}(f_{i_\alpha}) + \cR^{-1}(g_{j_\beta}) \ | \ 0 \leq \alpha \leq a, 0 \leq \beta \leq b \right\}
    \end{equation}

    Conversely, suppose a hyperplane \(H\) is tangent to \(\U(P \oplus Q)\) and contains the \((d-k)\)-cell \(\sigma'\)on the vertices from equation \ref{eqn:cell_in_minkowski_sum_PN}. Let \(\x = p(H)\) be the vector of linear coefficients of \(H\). If we had \(f_{i'} (\x) > f_{i_\alpha} (\x)\) for any \(i' \notin \{ i_0, \dots, i_a \} \ni i_\alpha\), then the point \(\cR^{-1} (f_{i'}) + \cR^{-1} (g_{j_0})\) would lie above \(H\), which is impossible. Therefore we must have
    \begin{equation}
        f_{i_0} (\x) = \dots = f_{i_a} (\x) > f_{i'} (\x)
    \end{equation}
    and a similar set of conditions involving \(g\)'s. This means that \(x = p(H)\) lies in the real cell \(\sigma\).

    These functions are mutually inverse, and hence provide a bijection between real points of \(\sigma\) and dual tangent hyperplanes containing \(\sigma'\).

    Since every point of the real space belongs to a unique cell, and every dual hyperplane tangent to \(\U (P \oplus N)\) intersects it in a unique cell, the assignment \(\sigma \leftrightarrow \sigma'\) is bijective.
\end{proof}
\begin{remark}
    Sign of the function on the cell (equivalently,  the class to which the region belongs) depends on which of \(\cR^{-1} \big( (\x, s) \big), \cR^{-1} \big( (\x, t) \big)\) lies above the other.
\end{remark}

\subsection*{Numerical Experiments Details} \label{app:exp}
The neural networks are initialized by the default Uniform distribution\footnote{The default weight initialization in \texttt{torch.nn.linear} is uniform on $[-\sqrt{1/N}, \sqrt{1/N}]$ where $N$ is the width.}. 
For all ReLU neural networks, the optimization is done by stochastic gradient descent with learning rate$=0.1$, momentum$=0.9$ and weight decay$=0.001$ (if not specified otherwise). 

\paragraph{2D spiral}
The synthetic spiral data is from the two-dimensional distribution $P = (\rho \sin \theta + 0.04,  \rho \cos \theta)$ where $\rho = {(\theta/4 \pi)}^{4/5} + \epsilon$ with selected $\theta$ from $(0, 4\pi]$ and $\epsilon \sim \text{unif}([-0.03,0.03])$. We draw 300 positive and 300 negative training samples from $-P$ and $P$, respectively, with a random seed fixed for every run. Both the Gaussian noise injection strength and the adversarial training strength are set at $0.01$. 

\paragraph{2D Gaussian mixture}
There are $3\times 3$ mixing components, each is an isotropic Gaussian with standard deviation $\sigma=0.1$. The means are grid points from $\{-1, 0, 1\} \times \{-1, 0, 1\}$. The mixing weight is equal for all components.
Both the Gaussian noise injection strength and the adversarial training strength are set at $0.1$. 

% \paragraph{MNIST Dataset} 
% The MNIST image is of size $28\times 28$, each element is originally a grey scale from 0 to 255 and normalized to be between $0$ and $1$. Before passing into ReLU network, the image is flattened into $1\times 784$. 
% Both the Gaussian noise injection strength and the adversarial training strength are set at $8/255$. 
Below we show some training trends for CE, Noisy and Adv in the Gaussian mixture case. It is worth noting that all trend plots in this work, including Figure \ref{fig:trend} and \ref{fig:trend2} are smoothed with moving averages. 

\begin{figure}[ht]
    \centering
     \includegraphics[width=0.5\textwidth
    ]{figures/trend_ce1}
    
    \caption{CE training trends of \#Boundary (red), \#Total (green), F-norm (red) vs. iteration in the Gaussian mixture case.}
  
\end{figure}

\begin{figure}[ht]
    \centering
     \includegraphics[width=0.5\textwidth
    ]{figures/trend_noisy}
   
    \caption{Noisy training trends of \#Boundary (red), \#Total (green), F-norm (red) vs. iteration in the Gaussian mixture case.}
    
\end{figure}

\begin{figure}[!ht]
    \centering
     \includegraphics[width=0.5\textwidth
    ]{figures/trend_adv}
   
    \caption{Adv training trends of \#Boundary (red), \#Total (green), F-norm (red) vs. iteration in the Gaussian mixture case.}
    
\end{figure}

% \begin{figure}[H]
% \caption{Training trends of \#Boundary (red), \#Total (green), F-norm (red) vs. iteration in the Gaussian mixture case. From left to right are CE, Noisy and Adv, respectively. }
%     \centering
%     \subfigure{
%       \includegraphics[width=0.32\textwidth]{figures/trend_ce1.pdf}}\vspace{-1cm}
%     \subfigure{
%       \includegraphics[width=0.32\textwidth]{figures/trend_noisy}}\vspace{-1cm}
%     \subfigure{
%       \includegraphics[width=0.32\textwidth]{figures/trend_adv.pdf}}
%     \vspace{-3mm}
    
%     \label{app_fig:sep}
% \end{figure}

\subsection*{An example of computation with Proposition~\ref{explicit}}

First we should note that
in a standard ReLU network the transition functions
are any affine functions
but we can introduce a `dummmy dimension' to realise these
as \emph{linear} functions.

We will consider a very simple network
with two-dimensional input,
one hidden layer with three neurons,
and the following transition matrices
(with the dummy dimension included).
For illustrative purposes we assume that
ReLU is applied also at the last layer.

\begin{equation*}
    A_1 = \begin{bmatrix}
        1   & -0.5  & 4  \\
        -2  & 1     & 0  \\
        3   & 3     & -1 \\
        0   & 0     & 1
    \end{bmatrix},\quad
    A_2 = \begin{bmatrix}
        0.5 & -1    & -0.5 & 2 \\
        0   & 0     & 0    & 1
    \end{bmatrix}
\end{equation*}

The input function \(F_0 = (x, y, 1)\)
(where the last coordinate is a dummy)
is decomposed into \(\cR(P_0) - \cR(N_0)\)
with
\begin{equation*}
    P_0 = \begin{pmatrix}
        \{(1,0,0)\} \\
        \{(0,1,0)\} \\
        \{(0,0,1)\}
    \end{pmatrix},\quad
    N_0 = \begin{pmatrix}
        \{(0,0,0)\} \\
        \{(0,0,0)\} \\
        \{(0,0,0)\} \\
    \end{pmatrix}.
\end{equation*}

To compute \(P_1\) and \(N_1\),
we need to decompose the matrix \(A_1\)
into its positive and negative parts \(A_1^+\) and \(A_1^-\).
\begin{align*}
    N_1 & = (A_1^+ \otimes N_0) \oplus (A_1^- \otimes P_0) \\
        & =
        \left(\begin{bmatrix}
            1   & 0     & 4  \\
            0   & 1     & 0  \\
            3   & 3     & 0  \\
            0   & 0     & 1
        \end{bmatrix}
        \otimes
        \begin{pmatrix}
            \{(0,0,0)\} \\
            \{(0,0,0)\} \\
            \{(0,0,0)\} \\
        \end{pmatrix}
        \right)\oplus\left(
        \begin{bmatrix}
            0   & 0.5   & 0  \\
            2   & 0     & 0  \\
            0   & 0     & 1 \\
            0   & 0     & 0
        \end{bmatrix}
        \otimes
        \begin{pmatrix}
            \{(1,0,0)\} \\
            \{(0,1,0)\} \\
            \{(0,0,1)\}
        \end{pmatrix}\right)       \\
        & =
        \begin{pmatrix}
            1 \{(0,0,0)\} \oplus 0 \{(0,0,0)\}
            \oplus 4 \{(0,0,0)\} \\
            0 \{(0,0,0)\} \oplus 1 \{(0,0,0)\}
            \oplus 0 \{(0,0,0)\} \\
            3 \{(0,0,0)\} \oplus 3 \{(0,0,0)\}
            \oplus 0 \{(0,0,0)\} \\
            0 \{(0,0,0)\} \oplus 0 \{(0,0,0)\}
            \oplus 1 \{(0,0,0)\} \\
        \end{pmatrix}
        \oplus
        \begin{pmatrix}
            0 \{(1,0,0)\} \oplus 0.5 \{(0,1,0)\}
            \oplus 0 \{(0,0,1)\} \\
            2 \{(1,0,0)\} \oplus 0 \{(0,1,0)\}
            \oplus 0 \{(0,0,1)\} \\
            0 \{(1,0,0)\} \oplus 0 \{(0,1,0)\}
            \oplus 1 \{(0,0,1)\} \\
            0 \{(1,0,0)\} \oplus 0 \{(0,1,0)\}
            \oplus 0 \{(0,0,1)\} \\
        \end{pmatrix}               \\
        & =
        \begin{pmatrix}
            \{(0,0,0)\} \\
            \{(0,0,0)\} \\
            \{(0,0,0)\} \\
            \{(0,0,0)\} \\
        \end{pmatrix}
        \oplus
        \begin{pmatrix}
            \{(0,0.5,0)\} \\
            \{(2,0,0)\} \\
            \{(0,0,1)\} \\
            \{(0,0,0)\} \\
        \end{pmatrix}
        =
        \begin{pmatrix}
            \{(0,0.5,0)\} \\
            \{(2,0,0)\} \\
            \{(0,0,1)\} \\
            \{(0,0,0)\} \\
        \end{pmatrix}           \\
    P_1 & = (A_1^+ \otimes P_0) \oplus (A_1^- \otimes N_0) \cup N_1 \\
    & =
    \begin{pmatrix}
        \{(1,0,4)\} \\
        \{(0,1,0)\} \\
        \{(3,3,0)\} \\
        \{(0,0,1)\} \\
    \end{pmatrix}
    \oplus
    \begin{pmatrix}
        \{(0,0,0)\} \\
        \{(0,0,0)\} \\
        \{(0,0,0)\} \\
        \{(0,0,0)\} \\
    \end{pmatrix}
    \cup
    \begin{pmatrix}
        \{(0,0.5,0)\} \\
        \{(2,0,0)\} \\
        \{(0,0,1)\} \\
        \{(0,0,0)\} \\
    \end{pmatrix}           \\
    & =
    \begin{pmatrix}
        \{(1,0,4)\} \\
        \{(0,1,0)\} \\
        \{(3,3,1)\} \\
        \{(0,0,1)\} \\
    \end{pmatrix}
    \cup
    \begin{pmatrix}
        \{(0,0.5,0)\} \\
        \{(2,0,0)\} \\
        \{(0,0,1)\} \\
        \{(0,0,0)\} \\
    \end{pmatrix}
    =
    \begin{pmatrix}
        \{(1,0,4), (0, 0.5, 0)\} \\
        \{(0,1,0), (2, 0, 0)\} \\
        \{(3,3,1), (0, 0, 1)\} \\
        \{(0,0,0), (0,0,1)\} \\
    \end{pmatrix}
    \substack{=}_{\U^*}
    \begin{pmatrix}
        \{(1,0,4), (0, 0.5, 0)\} \\
        \{(0,1,0), (2, 0, 0)\} \\
        \{(3,3,1), (0, 0, 1)\} \\
        \{(0,0,1)\} \\
    \end{pmatrix}
\end{align*}
The last operation
is reducing to the upper hull vertices
and it doesn't change the dual function \(\cR(P_1)\).

We repeat this calculation for the next layer.

\begin{align*}
    N_2 & = (A_2^+ \otimes N_1) \oplus (A_2^- \otimes P_1)    \\
    & =
    \left(
    \begin{bmatrix}
        0.5 & 0    & 0 & 2 \\
        0   & 0     & 0    & 1
    \end{bmatrix}
    \otimes
    \begin{pmatrix}
        \{(0,0.5,0)\} \\
        \{(2,0,0)\} \\
        \{(0,0,1)\} \\
        \{(0,0,0)\} \\
    \end{pmatrix}
    \right)
    \oplus
    \left(
    \begin{bmatrix}
        0 & 1    & 0.5 & 0 \\
        0   & 0     & 0    & 0
    \end{bmatrix}
    \otimes
    \begin{pmatrix}
        \{(1,0,4), (0, 0.5, 0)\} \\
        \{(0,1,0), (2, 0, 0)\} \\
        \{(3,3,1), (0, 0, 1)\} \\
        \{(0,0,1)\} \\
    \end{pmatrix}
    \right)                      \\
    & =
    \begin{pmatrix}
        0.5\{(0,0.5,0)\} \\
        \{(0,0,0)\} \\
    \end{pmatrix}
    \oplus
    \begin{pmatrix}
        1\{(0,1,0), (2, 0, 0)\} \oplus 0.5\{(3,3,1), (0, 0, 1)\} \\
        \{(0,0,0)\} \\
    \end{pmatrix}                        \\
    & =
    \begin{pmatrix}
        \{(0,0.25,0)\} \\
        \{(0,0,0)\} \\
    \end{pmatrix}
    \oplus
    \begin{pmatrix}
        \{(0,1,0), (2, 0, 0)\} \oplus \{(1.5,1.5,0.5), (0, 0, 0.5)\} \\
        \{(0,0,0)\} \\
    \end{pmatrix}                         \\
    & =
    \begin{pmatrix}
        \{(0,0.25,0)\} \\
        \{(0,0,0)\} \\
    \end{pmatrix}
    \oplus
    \begin{pmatrix}
        \{(1.5, 2.5, 0.5), (0,1,0.5), (3.5, 1.5, 0.5), (2, 0, 0.5)\} \\
        \{(0,0,0)\} \\
    \end{pmatrix}                                   \\
    & =
    \begin{pmatrix}
        \{(1.5, 2.75, 0.5), (0,1.25,0.5), (3.5, 1.75, 0.5), (2, 0.25, 0.5)\} \\
        \{(0,0,0)\} \\
    \end{pmatrix} \\
    P_2 & = (A_2^+ \otimes P_1) \oplus (A_2^- \otimes N_1) \cup N_2 \\
    & =
    \left(
    \begin{bmatrix}
        0.5 & 0    & 0 & 2 \\
        0   & 0     & 0    & 1
    \end{bmatrix}
    \otimes
    \begin{pmatrix}
        \{(1,0,4), (0, 0.5, 0)\} \\
        \{(0,1,0), (2, 0, 0)\} \\
        \{(3,3,1), (0, 0, 1)\} \\
        \{(0,0,1)\} \\
    \end{pmatrix}
    \right)
    \oplus
    \left(
    \begin{bmatrix}
        0 & 1    & 0.5 & 0 \\
        0   & 0     & 0    & 0
    \end{bmatrix}
    \otimes
    \begin{pmatrix}
        \{(0,0.5,0)\} \\
        \{(2,0,0)\} \\
        \{(0,0,1)\} \\
        \{(0,0,0)\} \\
    \end{pmatrix}
    \right)
    \\&\quad\cup
    \begin{pmatrix}
        \{(1.5, 2.75, 0.5), (0,1.25,0.5), (3.5, 1.75, 0.5), (2, 0.25, 0.5)\} \\
        \{(0,0,0)\} \\
    \end{pmatrix} \\
    & =
    \begin{pmatrix}
        0.5\{(1,0,4), (0, 0.5, 0)\} \oplus 2\{(0,0,1)\} \\
        1 \{(0,0,1)\}
    \end{pmatrix}
    \oplus
    \begin{pmatrix}
        1\{(2,0,0)\} \oplus 0.5\{(0,0,1)\} \\
        \{(0,0,0)\}
    \end{pmatrix}
    \\&\quad\cup
    \begin{pmatrix}
        \{(1.5, 2.75, 0.5), (0,1.25,0.5), (3.5, 1.75, 0.5), (2, 0.25, 0.5)\} \\
        \{(0,0,0)\} \\
    \end{pmatrix} \\
    & =
    \begin{pmatrix}
        \{(0.5,0,2), (0, 0.25, 0)\} \oplus \{(0,0,2)\} \\
        \{(0,0,1)\}
    \end{pmatrix}
    \oplus
    \begin{pmatrix}
        \{(2,0,0)\} \oplus \{(0,0,0.5)\} \\
        \{(0,0,0)\}
    \end{pmatrix}
    \\&\quad\cup
    \begin{pmatrix}
        \{(1.5, 2.75, 0.5), (0,1.25,0.5), (3.5, 1.75, 0.5), (2, 0.25, 0.5)\} \\
        \{(0,0,0)\} \\
    \end{pmatrix} \\
    & =
    \begin{pmatrix}
        \{(0.5,0,4), (0, 0.25, 2)\} \\
        \{(0,0,1)\}
    \end{pmatrix}
    \oplus
    \begin{pmatrix}
        \{(2,0,0.5)\} \\
        \{(0,0,0)\}
    \end{pmatrix}
    \\&\quad\cup
    \begin{pmatrix}
        \{(1.5, 2.75, 0.5), (0,1.25,0.5), (3.5, 1.75, 0.5), (2, 0.25, 0.5)\} \\
        \{(0,0,0)\} \\
    \end{pmatrix} \\
    & =
    \begin{pmatrix}
        \{(2.5,0,4.5), (2, 0.25, 2.5)\} \\
        \{(0,0,1)\}
    \end{pmatrix}
    \\&\quad\cup
    \begin{pmatrix}
        \{(1.5, 2.75, 0.5), (0,1.25,0.5), (3.5, 1.75, 0.5), (2, 0.25, 0.5)\} \\
        \{(0,0,0)\} \\
    \end{pmatrix} \\
    & =
    \begin{pmatrix}
        \{(2.5,0,4.5), (2, 0.25, 2.5), (1.5, 2.75, 0.5), (0,1.25,0.5), (3.5, 1.75, 0.5), (2, 0.25, 0.5)\} \\
        \{(0,0,1), (0,0,0)\} \\
    \end{pmatrix}
\end{align*}

Now, to reduce the result to the upper hull vertices,
we can note that
\[\frac{1}{5}(0,1.25,0.5)
+ \frac{4}{5}(2.5,0,4.5)
= (0,0.25, 0.1)  + (2, 0, 3.6) = (2, 0.25, 3.7) \succ (2, 0.25, 2.5), (2, 0.25, 0.5),\]
so the two points of the right hand side can be dropped without changing the upper hull.
This gives
\begin{equation*}
    P_2 \substack{=}_{\U^*}
    \begin{pmatrix}
        \{(2.5,0,4.5), (1.5, 2.75, 0.5), (0,1.25,0.5), (3.5, 1.75, 0.5)\} \\
        \{(0,0,1)\}
    \end{pmatrix}.
\end{equation*}

Finally, let's recover the representation as a DCPA function.

\begin{align*}
    F_2(x, y) & = \left(\cR(P_2) - \cR(N_2)\right)(x, y)  \\
    & = \max\{1.5x + 2.75y + 0.5, 1.25y + 0.5, 3.5x + 1.75y + 0.5, 2.5x+4.5\} \\
    & - \max\{1.5x+2.75y+0.5, 1.25y + 0.5, 3.5x + 1.75y + 0.5, 2x + 0.25y + 0.5\}
\end{align*}


\subsection*{Examples of application of propositions \ref{new-hard} and corollary \ref{corr_affine_pc_count}}\label{apdx:DCPA_counting_examples}

\paragraph{One-dimensional example} Consider
\begin{align*}
    & f_1 (x) = -\tfrac{1}{2} x - \tfrac{3}{2} \qquad & f_2 (x) =& \tfrac{1}{2} x + \tfrac{1}{2} \qquad & f_3 (x) = 2 x + 1 \\
    & g_1 (x) = 0 \qquad & g_2 (x) =& 2 x \qquad & g_3 (x) = 3 x - 1
\end{align*}

The DCPA function \(F (x) = \max \{ f_1(x), f_2(x), f_3(x) \} - \max \{ g_1(x), g_2(x), g_3(x) \}\) is plotted in figure \ref{subfig:1d_example_plot}. It has 5 affine regions and 3 zeros.

It is represented by dual points as
\begin{align*}
    \max \{ f_1, f_2, f_3 \} = \mathcal{R} (P), \qquad P = \left\{ \begin{pmatrix} -\tfrac{1}{2} \\ -\tfrac{3}{2} \end{pmatrix}, \begin{pmatrix} \tfrac{1}{2} \\ \tfrac{1}{2} \end{pmatrix}, \begin{pmatrix} 2 \\ 1 \end{pmatrix} \right\} \\
    \max \{ g_1, g_2, g_3 \} = \mathcal{R} (N), \qquad N = \left\{ \begin{pmatrix} 0 \\ 0 \end{pmatrix}, \begin{pmatrix} 2 \\ 0 \end{pmatrix}, \begin{pmatrix} 3 \\ -1 \end{pmatrix} \right\}
\end{align*}
Their upper convex hull \(\U (P \cup N)\) is shown on figure \ref{subfig:1d_example_dual_points}. As predicted by proposition \ref{new-hard}, the zero set of \(F\) is in bijection with 1-cells of \(\U (P \cup N)\) which join a point of \(P\) with a point of \(N\). This bijection is shown explicitly in table \ref{subfig:1d_example_cells_to_zeros}. The \(x\)-coordinates of zeros of \(F\) are given by negative slopes of these 1-cells.

The hull of the Minkowski sum \(P \oplus N\) is shown in figure \ref{subfig:1d_example_dual_sum}. In agreement with corollary \ref{corr_affine_pc_count}, there are 5 vertices on \(\U (P \cup N)\). The explicit bijections between the vertices of \(\U (P \cup N)\) and affine regions of \(F\), and between tangents at each vertex and points of the corresponding linear region, is given in the table \ref{subfig:1d_example_tangents_to_points}.

\paragraph{Two-dimensional example} Take
\begin{align*}
    & f_1 = - x + y + 4 & \qquad & f_2 = x + y -2 & \qquad f_3 = - 2 x - y - 1 \\
    & g_1 = 0 & \qquad & g_2 = 2 x - y + 2 & \qquad g_3 = - x + 2 y + 2
\end{align*}
which correspond to dual points
\begin{equation*}
    P = \left\{ \begin{pmatrix} -1 \\ 1 \\ 4 \end{pmatrix}, \begin{pmatrix} 1 \\ 1 \\ -2 \end{pmatrix}, \begin{pmatrix} -2 \\ -1 \\ -1 \end{pmatrix} \right\}, \qquad N = \left\{ \begin{pmatrix} 0 \\ 0 \\ 0 \end{pmatrix}, \begin{pmatrix} 2 \\ -1 \\ 2 \end{pmatrix}, \begin{pmatrix} -1 \\ 2 \\ 2 \end{pmatrix} \right\}
\end{equation*}
The function \(F = \max \{ f_1, f_2, f_3 \} - \max \{ g_1, g_2, g_3 \}\) is shown on figure \ref{subfig:2d_example_plot}. There are 7 affine regions and 6 boundary pieces.

The configuration of dual points \(P \cup N\) is shown on figure \ref{subfig:2d_example_dual_points}. The upper convex hull \(\U (P \cup N)\) contains 4 faces, 8 edges and 5 vertices. As predicted by proposition \ref{new-hard}, edges joining a point of \(P\) with a point of \(N\) correspond precisely to those affine regions of \(F\) which contain a boundary piece. Explicitly, these are \(f_1 - g_2, f_1 - g_3, f_2 - g_2, f_2 - g_3, f_3 - g_2, f_3 - g_3\).

The Minkowski sum \(P \oplus N\) is shown in figure \ref{subfig:2d_example_dual_sum}. In agreement with corollary \ref{corr_affine_pc_count}, 7 of the vertices lie on the upper convex hull. Explicitly, the functions \(f_1 - g_1\) and \(f_1 - g_2\) are the only ones which do not have a nonempty affine region, and the points \(\cR^{-1} (f_1) + \cR^{-1} (g_1)\) and \(\cR^{-1} (f_2) + \cR^{-1} (g_2)\) are the only ones which lie fully below the upper convex hull.

\begin{figure}
    \centering
    \subfigure[Plot of \(F\).]{
        \begin{tikzpicture}[scale=1.5]
        % axes
        \draw[->] (-3.5, 0) -- (4.5, 0);
        \draw[->] (0, -1.5) -- (0, 1.5);
        % ticks
        \draw (-3, -.1) -- (-3, .1) (-2, -.1) -- (-2, .1) (-1, -.1) -- (-1, .1) (1, -.1) -- (1, .1) (2, -.1) -- (2, .1) (3, -.1) -- (3, .1) (4, -.1) -- (4, .1);
        \draw (-.1, -1) -- (.1, -1) (-.1, 1) -- (.1, 1);
        % function
        \draw (-3.5, .25) -- (-2, -.5) -- (-1/3, 1/3) -- (0, 1) -- (1, 1) -- (3, -1);
    \end{tikzpicture}
    \label{subfig:1d_example_plot}
    }
    \subfigure[Points of \(P\) (marked \(\circ\)) and \(N\) (marked \(\times\)) in the dual space. Dashed lines are the upper convex hull \(\U (P \cup N)\). Double lines join a point of \(P\) with a point of \(N\).]{
        \begin{tikzpicture}
        % axes
        \draw[->] (-1.5, 0) -- (3.5, 0);
        \draw[->] (0, -2.5) -- (0, 1.5);
        % ticks
        \draw (-1, -.1) -- (-1, .1) (1, -.1) -- (1, .1) (2, -.1) -- (2, .1) (3, -.1) -- (3, .1);
        \draw (-.1, -2) -- (.1, -2) (-.1, -1) -- (.1, -1) (-.1, 1) -- (.1, 1);
        % points
        \node at (-.5, -1.5) {$\circ$}; \node at (-.8, -1.2) {\small $f_1$};
        \node at (.5, .5) {$\circ$}; \node at (.4, .8) {\small $f_2$};
        \node at (2, 1) {$\circ$}; \node at (2.4, 1) {\small $f_3$};
        \node at (0, 0) {$\times$}; \node at (.3, -.3) {\small $g_1$};
        \node at (2, 0) {$\times$}; \node at (1.8, -.3) {\small $g_2$};
        \node at (3, -1) {$\times$}; \node at (2.7, -1.2) {\small $g_3$};
        % hull
        \draw[dashed] (-.5, -1.5) (.5, .5) -- (2, 1);
        \draw[dashed, double] (-.5, -1.5) -- (0, 0) -- (.5, .5) (2, 1) -- (3, -1);
    \end{tikzpicture}
    \label{subfig:1d_example_dual_points}
    }
    \hspace{8mm}
    \subfigure[Correspondence between 1-cells of \(U (P \cup N)\) and zeros of \(F\). We represent 1-cell as a graph of a linear function over an interval.]{
        \begin{tabular}{c|c|c}
        vertices & 1-cell & zero of \(F\) \\
        \hline
        \(f_1, g_1\) & \(3x\) over \([-\tfrac{1}{2}, 0]\) & -3 \\
        \(f_2, g_2\) & \(x\) over \([0, \tfrac{1}{2}]\) & -1 \\
        \(f_3, g_3\) & \(-2x+4\) over \([2, 3]\) & 2
    \end{tabular}
    \label{subfig:1d_example_cells_to_zeros}
    }
    \subfigure[Minkowski sum \(P \oplus N\) in the dual space. Dashed lines represent the upper convex hull.]{
        \begin{tikzpicture}[scale=.8]
        % axes
        \draw[->] (-1.5, 0) -- (5.5, 0);
        \draw[->] (0, -3) -- (0, 1.5);
        % ticks
        \draw (-1, -.1) -- (-1, .1) (1, -.1) -- (1, .1) (2, -.1) -- (2, .1) (3, -.1) -- (3, .1) (4, -.1) -- (4, .1) (5, -.1) -- (5, .1);
        \draw (-.1, -3) -- (.1, -3) (-.1, -2) -- (.1, -2) (-.1, -1) -- (.1, -1) (-.1, 1) -- (.1, 1);
        % points
        \filldraw (-.5, -1.5) circle (1.5pt); \node at (-1, -1) {\small $f_1 - g_1$};
        \filldraw (1.5, -1.5) circle (1.5pt);
        \filldraw (2.5, -2.5) circle (1.5pt);
        \filldraw (.5, .5) circle (1.5pt); \node at (.2, .8) {\small $f_2 - g_1$};
        \filldraw (2.5, .5) circle (1.5pt);
        \filldraw (3.5, -.5) circle (1.5pt);
        \filldraw (2, 1) circle (1.5pt); \node at (2, 1.3) {\small $f_3 - g_1$};
        \filldraw (4, 1) circle (1.5pt); \node at (4.4, 1.3) {\small $f_3 - g_2$};
        \filldraw (5, 0) circle (1.5pt); \node at (5, -.3) {\small $f_3 - g_3$};
        % hull
        \draw[dashed] (-.5, -1.5) -- (.5, .5) -- (2, 1) -- (4, 1) -- (5, 0);
    \end{tikzpicture}
    \label{subfig:1d_example_dual_sum}
    }
    \hspace{5mm}
    \subfigure[Tangents to \(U (P \cup N)\) and the corresponding affine regions. We represent each tangent line as a graph of a linear function. They are parameterised by their slope \(t\).]{
        \begin{tabular}{c|c|c}
            vertex & tangents & affine region \\
            \hline
            \( f_1 - g_1 \) & \(\{t x + \tfrac{t-3}{2}\}_{t \in [2, \infty)}\) & \((-\infty, -2]\) \\
            \( f_2 - g_1 \) & \(\{t x + \tfrac{1 - t}{2}\}_{t \in [\tfrac{1}{2}, 2]}\) & \([-2, -\tfrac{1}{2}]\) \\
            \( f_3 - g_1\) & \(\{t x + 1 - 2 t\}_{t \in [0, \tfrac{1}{2}]}\)& \([-\tfrac{1}{2}, 0]\) \\
            \(f_3 - g_2\) & \(\{t x + 1 - 4 t\}_{t \in [-1, 0]}\) & \([0, 1]\) \\
            \(f_3 - g_3\) & \(\{t x - 5 t\}_{t \in (-\infty, -1]}\) & \([1, \infty)\)
        \end{tabular}
        \label{subfig:1d_example_tangents_to_points}
    }
    \caption{Illustration of the results on a one-dimensional example.}
\end{figure}

\begin{figure}[t]
    \centering
    \subfigure[The function \(F\) on the \(xy\) plane. Dotted lines mark the boundaries of affine regions. The annotation \(f_i - g_j\) means that \(F = f_i - g_j\) on the corresponding affine region. Solid lines indicate the zero set, and the shaded region contains arguments for which \(F\) is positive.]{
    \begin{tikzpicture}[scale=.7]
        % axes
        \draw[->] (-10.5, 0) -- (6.5, 0);
        \draw[->] (0, -5.5) -- (0, 6.5);
        % ticks
        \draw (-10, -.1) -- (-10, .1) (-9, -.1) -- (-9, .1) (-8, -.1) -- (-8, .1) (-7, -.1) -- (-7, .1) (-6, -.1) -- (-6, .1) (-5, -.1) -- (-5, .1) (-4, -.1) -- (-4, .1) (-3, -.1) -- (-3, .1) (-2, -.1) -- (-2, .1) (-1, -.1) -- (-1, .1) (1, -.1) -- (1, .1) (2, -.1) -- (2, .1) (3, -.1) -- (3, .1) (4, -.1) -- (4, .1) (5, -.1) -- (5, .1) (6, -.1) -- (6, .1);
        \draw (-.1, -5) -- (.1, -5) (-.1, -4) -- (.1, -4) (-.1, -3) -- (.1, -3) (-.1, -2) -- (.1, -2) (-.1, -1) -- (.1, -1) (-.1, 1) -- (.1, 1) (-.1, 3) -- (.1, 3) (-.1, 4) -- (.1, 4) (-.1, 5) -- (.1, 5) (-.1, 6) -- (.1, 6);
        % affine boundaries
        \draw[dotted, thick] (-2, -2) -- (-9, -5.5) (-2, -2) -- (-3.75, -5.5) (-2, -2) -- (6.5, 6.5);
        \draw[dotted, thick] (3, -4) -- (3, 6.5) (3, -4) -- (-10.5, 2.75) (3, -4) -- (4, -5.5);
        % zero set
        \draw (-10.5, 2.5) -- (-9, 2) -- (2, 2) -- (-.75, -2.125) -- (-.75, -5.5);
        \draw (5.25, 6.5) -- (4, 4) -- (6.5, 5.25);
        % annotations
        \node at (-2, 4) {$f_1 - g_3$};
        \node at (-7, -1) {$f_3 - g_3$};
        \node at (-4, -4) {$f_3 - g_1$};
        \node at (1, -5) {$f_3 - g_2$};
        \node at (1.5, -2) {$f_1 - g_2$};
        \node at (5, 1) {$f_2 - g_2$};
        \node at (4, 6) {$f_2 - g_3$};
        % positive region
        \draw[draw=none, fill=gray, fill opacity=.2] (-10.5, -5.5) -- (-10.5, 2.5) -- (-9, 2) -- (2, 2) -- (-.75, -2.125) -- (-.75, -5.5);
        \draw[draw=none, fill=gray, fill opacity=.2] (6.5, 6.5) -- (5.25, 6.5) -- (4, 4) -- (6.5, 5.25) -- (6.5, 6.5);
    \end{tikzpicture}
    \label{subfig:2d_example_plot}
    }
    \subfigure[Positions of \(P \cup N\) in the dual space. The first number indicates the \(z\)-coordinate. Dashed lines are projections of edges (1-cells) of the upper convex hull. The point \(\cR^{-1} (g_1)\) is fully below the hull.]{
    \begin{tikzpicture}
        % axes
        \draw[->] (-2.5, 0) -- (2.5, 0);
        \draw[->] (0, -1.5) -- (0, 2.5);
        % ticks
        \draw (-2, -.1) -- (-2, .1) (-1, -.1) -- (-1, .1) (1, -.1) -- (1, .1) (2, -.1) -- (2, .1);
        \draw (-.1, -1) -- (.1, -1) (-.1, 1) -- (.1, 1) (-.1, 2) -- (.1, 2);
        % points
        \node at (-1, 1) {$\circ$}; \node at (-.8, .55) {\small $4, f_1$};
        \node at (1, 1) {$\circ$}; \node at (1.3, 1.3) {\small $-2, f_2$};
        \node at (-2, -1) {$\circ$}; \node at (-2, -1.3) {\small $-1, f_3$};
        \node at (0, 0) {$\times$}; \node at (-.3, -.2) {\small $0, g_1$};
        \node at (2, -1) {$\times$}; \node at (2, -1.3) {\small $2, g_2$};
        \node at (-1, 2) {$\times$}; \node at (-1.5, 2) {\small $2, g_3$};
        % edges
        \draw[dashed, double] (-1, 2) -- (1, 1) -- (2, -1) -- (-2, -1) -- (-1, 2) -- (-1, 1) -- (2, -1);
        \draw[dashed] (-2, -1) -- (-1, 1) (-1, 2) -- (2, -1);
    \end{tikzpicture}
    \label{subfig:2d_example_dual_points}
    }
    \hspace{5mm}
    \subfigure[Projection of the faces of \(\U (P \oplus N)\) on the \(xy\)-plane. The first number indicates the \(z\)-coordinate; \(f_i - g_j\) is a shorthand for the dual point \(\cR^{-1} (f_i) + \cR^{-1} (g_j)\). Two points lie fully below the hull.]{
    \begin{tikzpicture}[scale=.8]
        % axes
        \draw[->] (-3.5, 0) -- (1, 0) (3, 0) -- (3.5, 0);
        \draw[->] (0, -2.5) -- (0, 3.5);
        % ticks
        \draw (-3, -.1) -- (-3, .1) (-2, -.1) -- (-2, .1) (-1, -.1) -- (-1, .1) (1, -.1) -- (1, .1) (2, -.1) -- (2, .1) (3, -.1) -- (3, .1);
        \draw (-.1, -2) -- (.1, -2) (-.1, -1) -- (.1, -1) (-.1, 1) -- (.1, 1) (-.1, 2) -- (.1, 2) (-.1, 3) -- (.1, 3);
        % points
        \filldraw (-1, 1) circle (1.5pt); \node at (-.8, .6) {\small $4, f_1 - g_1$};
        \filldraw (1, 0) circle (1.5pt); \node at (1.2, -.4) {\small $6, f_1 - g_2$};
        \filldraw (-2, 3) circle (1.5pt); \node at (-2, 3.3) {\small $6, f_1 - g_3$};
        \filldraw (1, 1) circle (1.5pt); \node at (1.2, 1.5) {\small $-2, f_2 - g_1$};
        \filldraw (3, 0) circle (1.5pt); \node at (3.1, -.7) {\small $0, f_2 - g_2$};
        \filldraw (0, 3) circle (1.5pt); \node at (1, 3) {\small $0, f_2 - g_3$};
        \filldraw (-2, -1) circle (1.5pt); \node at (-2.7, -1.3) {\small $-1, f_3 - g_1$};
        \filldraw (0, -2) circle (1.5pt); \node at (.9, -2.2) {\small $1, f_3 - g_2$};
        \filldraw (-3, 1) circle (1.5pt); \node at (-3.4, 1.6) {\small $1, f_3 - g_3$};
        % edges
        \draw[dashed] (-3, 1) -- (-2, 3) -- (0, 3) -- (3, 0) -- (0, -2) -- (-2, -1) -- (-3, 1) -- (0, -2) (-2, 3) -- (1, 0) -- (0, -2) (1, 0) -- (3, 0);
    \end{tikzpicture}
    \label{subfig:2d_example_dual_sum}
    }
    \caption{Illustration of the results on a two-dimensional example.}
\end{figure}

\end{document}