% This is samplepaper.tex, a sample chapter demonstrating the
% LLNCS macro package for Springer Computer Science proceedings;
% Version 2.21 of 2022/01/12
%
\documentclass[runningheads]{llncs}
%
\usepackage[T1]{fontenc}
\usepackage{enumitem}
\usepackage{float}
\usepackage{subcaption}
\usepackage{bm}
 \usepackage{relsize}
\usepackage{graphicx}
\usepackage{tikz}
\usepackage{tikz-3dplot}
\usepackage{makecell}
\usepackage{multirow}
\usepackage{mathtools}
\tdplotsetmaincoords{74}{115}
\usepackage{amssymb}

\usepackage[utf8]{inputenc}
\usepackage{amsmath}
\usepackage{algorithm}
\usepackage[noend]{algpseudocode}
\usepackage{ragged2e}

\usetikzlibrary{decorations.markings}
\usetikzlibrary{positioning}
\usetikzlibrary{calc}
\usepackage{hyperref}
\hypersetup{
    colorlinks=true,
    linkcolor=blue,
    filecolor=magenta,      
    }
\usetikzlibrary{arrows,automata}
\usetikzlibrary{positioning}
\tikzset{
    state/.style={
           rectangle,
           rounded corners,
           draw=black, very thick,
           text centered,
           },
    fontscale/.style = {font=\relsize{#1}}
}

%% The defined command
\newcommand{\authnote}[2]{{\textcolor{red}{#1:} \textcolor{blue}{#2}}}
\newcommand{\zyy}[1]{\authnote{Yuyi}{#1}}
\newcommand{\arena}{ARENA}
\newcommand{\crown}{$\alpha, \beta$-CROWN}
\newcommand{\wrelu}{WraLU}
\newcommand{\gurobi}{GUROBI}
\newcommand{\symlow}[1]{l^s_{#1}}
\newcommand{\symup}[1]{u^s_{#1}}
\newcommand{\grena}{GRENA}
\newcommand{\st}{\text{s.t. }}
\newcommand{\for}{\text{for }}
\newcommand{\tand}{\text{ and }}
\newcommand{\khoo}[1]{\textcolor{red}{#1}}
\renewcommand{\footnotesize}{\fontsize{7pt}{11pt}\selectfont}
%%%%
\begin{document}
%
\title{GRENA: GPU-aided Abstract Refinement for Neural Network Verification}
%
%
\author{Yuyi Zhong\inst{1}\textsuperscript{\dag} \and
Shaun Tan Zong Zhi\inst{1}\and
Hanping Xu \inst{1} \and
Siau-Cheng Khoo \inst{2}} 
%
\authorrunning{Yuyi Zhong et al.}
% First names are abbreviated in the running head.
% If there are more than two authors, 'et al.' is used.
%
\institute{National University of Singapore
\email{\{yuyizhong,shauntanzongzhi,xuhanping\}@u.nus.edu}\\
 \and
National University of Singapore\\
\email{khoosc@nus.edu.sg}
}
%
\maketitle
\begingroup\renewcommand\thefootnote{\dag}
\footnotetext{Corresponding author}
\endgroup
%
\begin{abstract}
Since neural network verification problems can be formulated as optimization problems, linear programming (LP) solvers have been deployed as off-the-shelf tools in such processes.
% 
However, existing LP solvers running on CPU scale poorly on large networks.
% 
To expedite the process, we propose an LP-solving {\em theorem} tailored to neural network verification.
% 
In practice, we transform the constrained solving problem into an unconstrained problem that can be executed on GPUs, significantly speeding up the solving process.
% 
We explicitly include constraints on layers that take more than one predecessor instead of handling multiple predecessors by inefficient concatenation.
% 
Our theorem applies to widely used networks, such as fully connected, convolutional, and residual networks.
% 
From our evaluation, our GPU-aided solver achieves comparable precision to the state-of-the-art (SOTA) solver \gurobi{} with significant speed improvements and helps acquire competitive verification precision compared to advanced verification methods.\textbf{}

\keywords{Abstract Refinement \and Linear Programming \and Neural Network Verification.}
\end{abstract}
%
%
\section{Introduction}
Researchers have investigated the verification of neural networks due to their wide application \cite{DBLP:journals/pacmpl/UrbanCWZ20,DBLP:conf/icse/PaulsenWW20,DBLP:journals/corr/abs-2103-06624}.
% 
Throughout the evolution of verification techniques, abstract interpretation-based techniques \cite{DBLP:journals/pacmpl/SinghGPV19,DBLP:conf/sp/GehrMDTCV18,tjandraatmadja2020convex,DBLP:journals/pacmpl/UgareSM22,DBLP:journals/pacmpl/UgareBM023,DBLP:conf/uss/WangPWYJ18,DBLP:conf/fmcad/ZelaznyWBK22,DBLP:conf/mlsys/MullerS0PV21} continue to play an important role.
% 
However, due to the nature of over-approximation, the methods could suffer from severe precision loss for deeper networks.
% 
Theoretically, such abstraction can be refined with the help of (mixed integer) linear programming (MILP or LP) \cite{DBLP:conf/iclr/SinghGPV19,DBLP:conf/tacas/YangLLHWSXZ21,DBLP:conf/vmcai/ZhongTK23} where \gurobi{} \cite{GurobiSystem} solver is commonly used despite the scalability concern that it executes on the CPU.
% Thus, there is a promising direction for improving abstraction (aka. abstract refinement) with the help of (mixed integer) linear programming (MILP or LP) \cite{DBLP:conf/iclr/SinghGPV19,DBLP:conf/tacas/YangLLHWSXZ21,DBLP:conf/vmcai/ZhongTK23} where \gurobi{} \cite{GurobiSystem} solver is commonly used despite the scalability concern that it executes on the CPU.

Therefore, we propose a tailored theorem to accelerate LP solving for abstract-refinement-based methods.
% 
Notably, our theorem could handle three types of constraints: output constraints, intermediate neuron constraints and constraints of layers that take more than one predecessor, which enhances the scientific rigor of the verification of residual networks. 
% 
Our paper offers a methodical transformation from the stage of verification specification to the stage of effective implementation as an analyzer named \grena{} (\underline{G}PU-aided abstract \underline{RE}finement for \underline{N}eural network verific\underline{A}tion), and we assess it against the state-of-the-art tools to empirically support its strong solving and verification capacities.
% 
Our dockerized system, data, usage documentation and experiment scripts are available at \textcolor{blue}{\url{https://github.com/Grena-verifier/Grena-verifier}}.
% 
We summarize our contributions below:
% 
\begin{itemize}[left=1em]
  \item We propose a novel, formal and rigorous theorem to solve constrained optimization problems that include output constraints, multi-ReLU constraints, and complex constraints of residual network layers. 
  % 
  Specifically, to the best of our knowledge, this is the first work that uses Lagrangian dual on spurious-adversarial-label guided refinement process to enhance the scientific rigor of the verification of residual networks.
  % \khoo{Specifically, to the best of our knowledge, this is the first work that uses Lagrangian dual on spurious-adversarial-label guided refinement process to enhance the scientific rigor of the verification of residual networks.}
  % 
  \item We utilize the multi-ReLU abstraction in \wrelu{} \cite{DBLP:journals/pacmpl/MaLB24} to further tighten our constraint set for precision improvement.

  \item We provide strong and effective implementations and demonstrate the verification efficiency of our system through empirical experiments, and deliver a \textcolor{blue}{\href{https://drive.google.com/file/d/17v1WnabNrzC-ZwJzJ4dLTQmm9JvDYfj5/view?usp=sharing}{video showcase}} \footnote{https://drive.google.com/file/d/17v1WnabNrzC-ZwJzJ4dLTQmm9JvDYfj5/view?usp=sharing} of our analyzer.
\end{itemize}

% 
% 
\section{Overview}
\label{sec:overview}
To provide an intuitive understanding, we use an example in \autoref{fig:SimplyResiBlock} to show how the approach works given the network and the input space $I =
[-1,1]\times [-1,1]$ of 2 input neurons $x_1$, $x_2$.
% 
This network has 2 output neurons $y_1, y_2$, corresponding to two
labels $L_1, L_2$ that an input can be classified as, and we aim to verify that $y_1 - y_2 >0$ for all inputs in $I$.

\begin{figure}[!ht]
\centering
\scriptsize
\begin{subfigure}[b]{0.4\textwidth}
\begin{tikzpicture}[
    input/.style={rectangle, draw = red, thick,
      minimum width=1cm, minimum height=0.5cm},
    output/.style={rectangle, draw = blue, thick,
      minimum width=1cm, minimum height=0.5cm},
  linear/.style={rectangle, draw = black, thick,
      minimum width=1cm, minimum height=0.5cm}
    ]
  % neuron nodes
  \node[input, red] (input){input};
  \node[linear, black, below left = 6mm and 3 mm of input](linear1){linear1};
  \node[linear, black, right = 16mm of linear1](linear2){linear2};
  \node[linear, black, below = 18mm of input](add){add};
  \node[output, blue, below = 6mm of add](output){relu};
  % connections
  \draw [->,black, thin](input) -- (linear1);
  \draw [->,black, thin](input) -- (linear2);
  \draw [->,black, thin](linear1) -- (add);
  \draw [->,black, thin](linear2) -- (add);
  \draw [->, black, thin] (add) -- (output);
 \end{tikzpicture}
  \caption{The network with an add layer}
 \label{fig:addblock}
\end{subfigure}
% 
\begin{subfigure}[b]{0.4\textwidth}
   \begin{tikzpicture}[
    red_node/.style={circle, draw=red, fill=red!5, thin,
      minimum size = 6mm, inner sep=1pt},
    blue_node/.style={circle, draw = blue, fill=cyan!5, thin,
      minimum size = 6mm, inner sep=1pt},
    black_node/.style={circle, draw = black, fill=black!5, thin,
      minimum size = 6mm, inner sep=1pt},
    red_rectangle/.style={rectangle, draw = red, thin, dashed}
    ]
  % neuron nodes
  \node[red, red_node] (x1){$x_1$};
  \node[red, red_node, right = 5mm of x1](x2){$x_2$};
  \node[black, black_node, below = 5mm of x1] (x4){$x_4$};
 \node[black, black_node, left = 5mm of x4] (x3){$x_3$};
 \node[black, black_node, right = 5mm of x4] (x5){$x_5$};
  \node[black, black_node, right = 5mm of x5] (x6){$x_6$};
   \node[black, black_node, below = 5mm of x4] (x7){$x_7$};
  \node[black, black_node, below = 5mm of x5] (x8){$x_8$};
  \node[blue, blue_node, below = 5mm of x7] (y1){$y_1$};
  \node[blue, blue_node, below = 5mm of x8] (y2){$y_2$};
  % definitions
  \node[black, below left = 0.5mm and -6mm of x3] (x3def) {$x_1+2.8$};
  \node[black, below = 0mm of x4] (x4def) {$x_1$};
  \node[black, below = 0mm of x5] (x5def) {$x_2$};
  \node[black, below = -0.5mm of x6] (x6def) {$-x_2$};
  \node[black, left = 0mm of x7] (x7def) {$x_3+x_5$};
  \node[black, right = 0mm of x8] (x8def) {$x_4+x_6$};
  \node[blue, above left = -1mm and -1mm of y1] (y1def) {$\mathrm{max}(0, x_7)$};
  \node[blue, above right = -1mm and -1mm of y2] (y2def) {$\mathrm{max}(0, x_8)$};
  % edges
  \draw [->,black,thin](x1) -- (x3);
  \draw [->,black,thin](x1) -- (x4);
  \draw [->,black,thin](x1) -- (x5);
  \draw [->,black,thin](x1) -- (x6);
  \draw [->,black,thin](x2) -- (x3);
  \draw [->,black,thin](x2) -- (x4);
  \draw [->,black,thin](x2) -- (x5);
  \draw [->,black,thin](x2) -- (x6);
  \draw [->,black,thin](x3) -- (x7);
  \draw [->,black,thin](x5) -- (x7);
  \draw [->,black,thin](x4) -- (x8);
  \draw [->,black,thin](x6) -- (x8);
  \draw [->,black,thin](x7) -- (y1);
  \draw [->,black,thin](x8) -- (y2);
\end{tikzpicture}
\caption{Neuron connections}
\label{fig:neuronNet}
\end{subfigure}
\caption{The example network to be verified with $y_1-y_2 > 0$ with input space $I =
[-1,1]\times [-1,1]$}
\label{fig:SimplyResiBlock}
\end{figure}

We first apply the abstract interpretation technique, as deployed by DeepPoly \cite{DBLP:journals/pacmpl/SinghGPV19}, to compute the reachable statuses for each neuron at \autoref{fig:neuronNet} and represent them by four elements $(l_i, u_i, \symlow{i}, \symup{i})$.
%
The {\em concrete} lower and upper bounds $l_i, u_i$ form an interval $[l_i, u_i]$ that over-approximates all the values that neuron $x_i$ could take.
%
The {\em symbolic} constraints $\symlow{i}, \symup{i}$ are linear expressions of $x_i$ defined over preceding neurons while satisfying $\symlow{i} \leq x_i \leq \symup{i}$.
% 
The abstract values are displayed near the corresponding nodes at \autoref{fig:AINet}.

\begin{figure}[!ht]
\scriptsize
\centering
   \begin{tikzpicture}[
    red_node/.style={circle, draw=red, fill=red!5, thin,
      minimum size = 6mm, inner sep=1pt},
    blue_node/.style={circle, draw = blue, fill=cyan!5, thin,
      minimum size = 6mm, inner sep=1pt},
    black_node/.style={circle, draw = black, fill=black!5, thin,
      minimum size = 6mm, inner sep=1pt},
    red_rectangle/.style={rectangle, draw = red, thin, dashed}
    ]
  % neuron nodes
  \node[red, red_node] (x1){$x_1$};
  \node[black, above = 2mm of x1] (input1){$[-1,1]$};
  \node[red, red_node, right = 8mm of x1](x2){$x_2$};
  \node[black, above = 2mm of x2] (input2){$[-1,1]$};
  \node[black, black_node, below = 5mm of x1] (x4){$x_4$};
 \node[black, black_node, left = 8mm of x4] (x3){$x_3$};
 \node[black, black_node, right = 8mm of x4] (x5){$x_5$};
  \node[black, black_node, right = 8mm of x5] (x6){$x_6$};
   \node[black, black_node, below = 10mm of x3] (x7){$x_7$};
  \node[black, black_node, below = 10mm of x6] (x8){$x_8$};
  \node[blue, blue_node, below = 8mm of x7] (y1){$y_1$};
  \node[blue, blue_node, below = 8mm of x8] (y2){$y_2$};
  % x3 definitions
  \node[black, above left = 0.5mm and -6mm of x3] () {$x_3\in [-1.8, 3.8]$};
 \node[black, above left = 3.5mm and -6mm of x3] () {$x_3 \leq x_1+2.8$};
  \node[black, above left = 6.5 mm and -6mm of x3] () {$x_3 \geq x_1+2.8$};
  % x6 def
    \node[black, above right = 0.5mm and -6mm of x6] () {$x_6\in [-1, 1]$};
 \node[black, above right = 3.5mm and -6mm of x6] () {$x_6 \leq -x_2$};
  \node[black, above right = 6.5 mm and -6mm of x6] () {$x_6 \geq -x_2$};
  % x4 def
  \node[black, below left = 0mm and -3.5mm of x4] () {$x_4 \in [-1,1]$};
    \node[black, below left = 3mm and 1mm of x4] () {$x_4 \leq x_1$};
    \node[black, below left = 5.5mm and 1mm of x4] () {$x_4 \geq x_1$};
  % x5def
    \node[black, below right = 0mm and -3.5mm of x5] () {$x_5 \in [-1,1]$};
    \node[black, below right = 3mm and 1mm of x5] () {$x_5 \leq x_2$};
    \node[black, below right = 5.5mm and 1mm of x5] () {$x_5 \geq x_2$};
  % 7def {$x_3+x_5$};
  \node[black, below left = -0.5mm and 1mm of x7] () {$x_7\in [0.8, 4.8]$};
  \node[black, below left = -3.5mm and 1mm of x7] () {$x_7\leq x_3 + x_5$};
 \node[black, below left = -6.5mm and 1mm of x7] () {$x_7\geq x_3 + x_5$};
 % x8def {$x_4+x_6$};
 \node[black, below right = -0.5mm and 1mm of x8] () {$x_7\in [-2, 2]$};
  \node[black, below right = -3.5mm and 1mm of x8] () {$x_7\leq x_4+x_6$};
 \node[black, below right = -6.5mm and 1mm of x8] () {$x_7\geq x_4+x_6$};
 % y1def
  \node[black, right = -0.5mm of y1] () {$y_1\in [0.8, 4.8]$};
  \node[black, above right = -1.5mm and 0.5mm of y1] () {$y_1\leq x_7$};
 \node[black, above right = 1.5mm and 0.5mm of y1] () {$y_1\geq x_7$};
  % y2def
  \node[black, left = -0.5mm of y2] () {$y_2\in [0, 2]$};
  \node[black, above left = -1.5mm and 0.5mm of y2] () {$y_2\leq 0.5 x_8 + 1$};
 \node[black, above left = 1.5mm and 0.5mm of y2] () {$y_2\geq 0$};
  % edges
 \draw [->,black,thin](input1) -- (x1);
  \draw [->,black,thin](input2) -- (x2);
  \draw [->,black,thin](x1) -- (x3);
  \draw [->,black,thin](x1) -- (x4);
  \draw [->,black,thin](x1) -- (x5);
  \draw [->,black,thin](x1) -- (x6);
  \draw [->,black,thin](x2) -- (x3);
  \draw [->,black,thin](x2) -- (x4);
  \draw [->,black,thin](x2) -- (x5);
  \draw [->,black,thin](x2) -- (x6);
  \draw [->,black,thin](x3) -- (x7);
  \draw [->,black,thin](x5) -- (x7);
  \draw [->,black,thin](x4) -- (x8);
  \draw [->,black,thin](x6) -- (x8);
  \draw [->,black,thin](x7) -- (y1);
  \draw [->,black,thin](x8) -- (y2);
\end{tikzpicture}
\vspace{-0.5em}
\caption{The network to perform abstract interpretation}
\label{fig:AINet}
\end{figure}
% 
Based on the abstraction, the computed value for the lower bound of $y_1 - y_2$ is $-0.2$ ($y_1 - y_2$ will be treated as an auxiliary neuron in order to compute its lower bound, the details can be found in \cite{DBLP:journals/pacmpl/SinghGPV19}), failing to assert that $y_1 - y_2 > 0$.
% 
However, this failure is due to the over-approximation error, and there is no such instance that leads to $y_1 - y_2 < 0$.
% 
To prove $y_1 - y_2 < 0$ to be infeasible, we will construct a constraint set that encodes the existence
of spurious counterexamples together with the network constraints (conjunction of all linear inequities including the {\em concrete} bounds and {\em symbolic} constraints of all neurons).
% 
Based on the constraint set, we send it to our tailored LP solver (details of our solving theorem at \autoref{sec:theorem}) to resolve the concrete bounds of input neurons ($x_1, x_2$) and those linear neurons ($x_8$) that are followed by ReLU and take both negative and positive values. 
% 
The returned bounds will be tighter, as shown in \autoref{fig:afterLP}, and diminish the inconclusiveness produced by the previous abstract interpretation.

\begin{figure}[!ht]
\scriptsize
\centering
   \begin{tikzpicture}[
    red_node/.style={circle, draw=red, fill=red!5, thin,
      minimum size = 6mm, inner sep=1pt},
    blue_node/.style={circle, draw = blue, fill=cyan!5, thin,
      minimum size = 6mm, inner sep=1pt},
    black_node/.style={circle, draw = black, fill=black!5, thin,
      minimum size = 6mm, inner sep=1pt},
    red_rectangle/.style={rectangle, draw = red, thin, dashed}
    ]
  % neuron nodes
  \node[red, red_node] (x1){$x_1$};
  \node[black, above = 2mm of x1] (input1){$[-1,$\textcolor{red}{$-0.867^*$}$]$};
  \node[red, red_node, right = 9mm of x1](x2){$x_2$};
  \node[black, above = 2mm of x2] (input2){$[-1,$\textcolor{red}{$-0.6^*$}$]$};
  \node[black, black_node, below = 5mm of x1] (x4){$x_4$};
 \node[black, black_node, left = 8mm of x4] (x3){$x_3$};
 \node[black, black_node, right = 8mm of x4] (x5){$x_5$};
  \node[black, black_node, right = 8mm of x5] (x6){$x_6$};
   \node[black, black_node, below = 10mm of x3] (x7){$x_7$};
  \node[black, black_node, below = 10mm of x6] (x8){$x_8$};
  \node[blue, blue_node, below = 8mm of x7] (y1){$y_1$};
  \node[blue, blue_node, below = 8mm of x8] (y2){$y_2$};
  % x3 definitions
  \node[black, above left = 0.5mm and -6mm of x3] () {$x_3\in [-1.8, 3.8]$};
 \node[black, above left = 3.5mm and -6mm of x3] () {$x_3 \leq x_1+2.8$};
  \node[black, above left = 6.5 mm and -6mm of x3] () {$x_3 \geq x_1+2.8$};
  % x6 def
    \node[black, above right = 0.5mm and -6mm of x6] () {$x_6\in [-1, 1]$};
 \node[black, above right = 3.5mm and -6mm of x6] () {$x_6 \leq -x_2$};
  \node[black, above right = 6.5 mm and -6mm of x6] () {$x_6 \geq -x_2$};
  % x4 def
  \node[black, below left = 0mm and -3.5mm of x4] () {$x_4 \in [-1,1]$};
    \node[black, below left = 3mm and 1mm of x4] () {$x_4 \leq x_1$};
    \node[black, below left = 5.5mm and 1mm of x4] () {$x_4 \geq x_1$};
  % x5def
    \node[black, below right = 0mm and -3.5mm of x5] () {$x_5 \in [-1,1]$};
    \node[black, below right = 3mm and 1mm of x5] () {$x_5 \leq x_2$};
    \node[black, below right = 5.5mm and 1mm of x5] () {$x_5 \geq x_2$};
  % 7def {$x_3+x_5$};
  \node[black, below left = -0.5mm and 1mm of x7] () {$x_7\in [0.8, 4.8]$};
  \node[black, below left = -3.5mm and 1mm of x7] () {$x_7\leq x_3 + x_5$};
 \node[black, below left = -6.5mm and 1mm of x7] () {$x_7\geq x_3 + x_5$};
 % x8def {$x_4+x_6$};
 \node[black, below right = -1mm and 0mm of x8] () {$x_7\in [$\textcolor{red}{$-0.4^*, 0.133^*$}$]$};
  \node[black, below right = -3.5mm and 0.5mm of x8] () {$x_7\leq x_4+x_6$};
 \node[black, below right = -6.5mm and 0.5mm of x8] () {$x_7\geq x_4+x_6$};
 % y1def
  \node[black, right = -0.5mm of y1] () {$y_1\in [0.8, 4.8]$};
  \node[black, above right = -1.5mm and 0.5mm of y1] () {$y_1\leq x_7$};
 \node[black, above right = 1.5mm and 0.5mm of y1] () {$y_1\geq x_7$};
  % y2def
  \node[black, left = -0.5mm of y2] () {$y_2\in [0, 2]$};
  \node[black, above left = -1.5mm and 0.5mm of y2] () {$y_2\leq 0.5 x_8 + 1$};
 \node[black, above left = 1.5mm and 0.5mm of y2] () {$y_2\geq 0$};
  % edges
 \draw [->,black,thin](input1) -- (x1);
  \draw [->,black,thin](input2) -- (x2);
  \draw [->,black,thin](x1) -- (x3);
  \draw [->,black,thin](x1) -- (x4);
  \draw [->,black,thin](x1) -- (x5);
  \draw [->,black,thin](x1) -- (x6);
  \draw [->,black,thin](x2) -- (x3);
  \draw [->,black,thin](x2) -- (x4);
  \draw [->,black,thin](x2) -- (x5);
  \draw [->,black,thin](x2) -- (x6);
  \draw [->,black,thin](x3) -- (x7);
  \draw [->,black,thin](x5) -- (x7);
  \draw [->,black,thin](x4) -- (x8);
  \draw [->,black,thin](x6) -- (x8);
  \draw [->,black,thin](x7) -- (y1);
  \draw [->,black,thin](x8) -- (y2);
\end{tikzpicture}
\vspace{-0.5em}
\caption{The result of resolving bounds (in {\color{red} red} marked by {\color{red}$^{*}$})}
\label{fig:afterLP}
\end{figure}

Based on the updated bounds, we rerun abstract interpretation and update the abstract values of all neurons accordingly, as shown in \autoref{fig:secondAI}.
% 
Based on the new abstraction, the lower bound of $y_1-y_2$ is 0.7, making $y_1 - y_2 \leq 0$ actually infeasible, which means that $y_1$ dominates over $y_2$ and we could conclude that $y_1-y_2 > 0$.

\begin{figure}[!ht]
\scriptsize
\centering
   \begin{tikzpicture}[
    red_node/.style={circle, draw=red, fill=red!5, thin,
      minimum size = 6mm, inner sep=1pt},
    blue_node/.style={circle, draw = blue, fill=cyan!5, thin,
      minimum size = 6mm, inner sep=1pt},
    black_node/.style={circle, draw = black, fill=black!5, thin,
      minimum size = 6mm, inner sep=1pt},
    red_rectangle/.style={rectangle, draw = red, thin, dashed}
    ]
  % neuron nodes
  \node[red, red_node] (x1){$x_1$};
  \node[black, above = 2mm of x1] (input1){$[-1,$\textcolor{red}{$-0.867^*$}$]$};
  \node[red, red_node, right = 9mm of x1](x2){$x_2$};
  \node[black, above = 2mm of x2] (input2){$[-1,$\textcolor{red}{$-0.6^*$}$]$};
  \node[black, black_node, below = 5mm of x1] (x4){$x_4$};
 \node[black, black_node, left = 9mm of x4] (x3){$x_3$};
 \node[black, black_node, right = 8mm of x4] (x5){$x_5$};
  \node[black, black_node, right = 9mm of x5] (x6){$x_6$};
   \node[black, black_node, below = 11mm of x3] (x7){$x_7$};
  \node[black, black_node, below = 11mm of x6] (x8){$x_8$};
  \node[blue, blue_node, below = 8mm of x7] (y1){$y_1$};
  \node[blue, blue_node, below = 8mm of x8] (y2){$y_2$};
  % x3 definitions
  \node[black, above left = 0.5mm and -6mm of x3] () {$x_3\in [-1.8,$\textcolor{blue}{$1.933^\dagger$}$]$};
 \node[black, above left = 3.5mm and -6mm of x3] () {$x_3 \leq x_1+2.8$};
  \node[black, above left = 6.5 mm and -6mm of x3] () {$x_3 \geq x_1+2.8$};
  % x6 def
    \node[black, above right = 0.5mm and -6mm of x6] () {$x_6\in [$\textcolor{blue}{$0.6^\dagger$}$, 1]$};
 \node[black, above right = 3.5mm and -6mm of x6] () {$x_6 \leq -x_2$};
  \node[black, above right = 6.5 mm and -6mm of x6] () {$x_6 \geq -x_2$};
  % x4 def
  \node[black, below left = 0mm and -5.5mm of x4] () {$[-1,$\textcolor{blue}{$-0.867^\dagger$}$]$};
    \node[black, below left = 3mm and 1mm of x4] () {$x_4 \leq x_1$};
    \node[black, below left = 5.5mm and 1mm of x4] () {$x_4 \geq x_1$};
  % x5def
    \node[black, below right = 0mm and -4.5mm of x5] () {$[-1,$\textcolor{blue}{$-0.6^\dagger$}$]$};
    \node[black, below right = 3mm and 1mm of x5] () {$x_5 \leq x_2$};
    \node[black, below right = 5.5mm and 1mm of x5] () {$x_5 \geq x_2$};
  % 7def {$x_3+x_5$};
  \node[black, below left = -0.5mm and 1mm of x7] () {$x_7\in [0.8,$\textcolor{blue}{$1.333^\dagger$}$]$};
  \node[black, below left = -3.5mm and 1mm of x7] () {$x_7\leq x_3 + x_5$};
 \node[black, below left = -6.5mm and 1mm of x7] () {$x_7\geq x_3 + x_5$};
 % x8def {$x_4+x_6$};
 \node[black, below right = -1mm and 0.5mm of x8] () {$x_7\in [$\textcolor{red}{$-0.4^*, 0.133^*$}$]$};
  \node[black, below right = -3.5mm and 0.5mm of x8] () {$x_7\leq x_4+x_6$};
 \node[black, below right = -6.5mm and 0.5mm of x8] () {$x_7\geq x_4+x_6$};
 % y1def
  \node[black, right = -0.5mm of y1] () {$y_1\in [0.8,$\textcolor{blue}{$1.333^\dagger$}$]$};
  \node[black, above right = -1.5mm and 0.5mm of y1] () {$y_1\leq x_7$};
 \node[black, above right = 1.5mm and 0.5mm of y1] () {$y_1\geq x_7$};
  % y2def
  \node[black, left = -0.5mm of y2] () {$y_2\in [0, 2]$};
  \node[black, above left = -1.3mm and 0.5mm of y2] () {$y_2\leq$\textcolor{blue}{$0.25 x_8 + 0.1^\dagger$}};
 \node[black, above left = 1.5mm and 0.5mm of y2] () {$y_2\geq 0$};
  % edges
 \draw [->,black,thin](input1) -- (x1);
  \draw [->,black,thin](input2) -- (x2);
  \draw [->,black,thin](x1) -- (x3);
  \draw [->,black,thin](x1) -- (x4);
  \draw [->,black,thin](x1) -- (x5);
  \draw [->,black,thin](x1) -- (x6);
  \draw [->,black,thin](x2) -- (x3);
  \draw [->,black,thin](x2) -- (x4);
  \draw [->,black,thin](x2) -- (x5);
  \draw [->,black,thin](x2) -- (x6);
  \draw [->,black,thin](x3) -- (x7);
  \draw [->,black,thin](x5) -- (x7);
  \draw [->,black,thin](x4) -- (x8);
  \draw [->,black,thin](x6) -- (x8);
  \draw [->,black,thin](x7) -- (y1);
  \draw [->,black,thin](x8) -- (y2);
\end{tikzpicture}
\vspace{-0.5em}
\caption{The updated abstract values (in {\color{blue} blue} marked by {\color{blue} $^\dagger$})}
\label{fig:secondAI}
\end{figure}

In summary, our system uses LP solving and abstract interpretation to eliminate adversarial labels that are actually infeasible.
% 
Note that in our constraint set, we explicitly encode an Add layer that takes two predecessors as $[x_7, x_8] = [x_3, x_4] + [x_5,x_6]$, and we will elaborate on how our theorem handles two predecessors at \autoref{theorem} instead of simply concatenating two predecessors into one in an engineering manner.
% 
% 
\section{Methodologies}
We provide a simplified case that only contains one adversarial label $y_2$ in the previous section.
% 
But in general, the verification process repeatedly selects multiple adversarial labels and attempts to eliminate them through iterations of refinements as illustrated in \autoref{fig:iteration}. 
% 
\begin{figure}[!ht]
\scriptsize
\centering
\begin{tikzpicture}[->, >=stealth']
\node[state] (Cset) 
{
\begin{tabular}{c}
\textbf{The Constraint Set $\Pi$:}\\
$adv_1 \vee \cdots \vee adv_\delta$\\
$\wedge$\\
 network abstraction \\
 $\wedge$\\
 \wrelu{} constraints
\end{tabular}
};

\node[node distance = 1.6cm, above of= Cset] (eliminated) 
{
\color{green}{\textbf{Eliminated}}
};

\node[state, node distance = 2.2cm, below of= Cset] (Solver) 
{
\begin{tabular}{c}
\textbf{Tailored LP Solver:}\\
max/min inputs or\\ 
unstable ReLUs\\
based on set $\Pi$
\end{tabular}
};

\node (iteration) at (2.5,-1)
{\scalebox{2}{$\circlearrowleft$}};

\node (inconclusive) at (2.1,2.5)
{\color{orange}{\textbf{Inconclusive}}};

\node[state, node distance = 1.75cm, below of= Solver] (Checker) 
{
\begin{tabular}{c}
\textbf{Violation Checker:}\\
check the batch of \\ 
solutions of LP solving
\end{tabular}
};

\node[node distance = 5.2cm, right of= Checker] (falsified) 
{
\color{red}{\textbf{Falsified}}
};

\node[state, node distance = 4.4cm, right of= Solver, xshift = 0cm] (Refine) 
{
\begin{tabular}{c}
\textbf{Refined Abstraction:}\\
\color{blue} tighter bounds \\
$\downarrow$ \\
\color{blue} update network abstraction 
\end{tabular}
};

\node[rectangle, draw = black, thick, dashed, minimum width=8.5cm, minimum height=6.5cm] () at (2.25,-1.4) {};

\path[-] (Refine) edge[] (4.4,0);
\path[->]
(Cset) edge[] node[xshift=0.6cm] {feasible} (Solver)
(Solver) edge[] node[xshift=0.5cm] {return} (Checker)
(Solver) edge[] node[yshift=0.2cm] {return} (Refine)
(4.4,0) edge[] node[yshift=0.2cm] {convey} (Cset)
(Checker) edge[] node[yshift=0.2cm] {counterexample found} (falsified)
(Cset) edge[] node[xshift=0.7cm] {infeasible} (eliminated)
(2.1,1.85) edge[] node[xshift=2.1cm] {exceed time/iteration thresholds} (inconclusive);
\end{tikzpicture}
\caption{The iterative process of abstract refinement}
\label{fig:iteration}
\vspace{-2em}
\end{figure}
% 
In each iteration, we take the encoding of multiple adversarial labels (the disjunction is handled by following the convention in \cite{DBLP:conf/vmcai/ZhongTK23}), the current network abstraction, plus the SOTA \emph{\wrelu{} multi-neuron constraints} as the constraint set.
% 
We eliminate $\delta$ spurious adversarial labels if the constraint set is infeasible, and eliminating all adversarial labels results in successful verification. 
% 
If the constraint set is feasible, we send it to our \emph{tailored LP solver on GPU} (details deferred till \autoref{sec:theorem}) and resolve neuron bounds to obtain a refined abstraction, where the refined abstraction is used in the next iteration.
% 
Furthermore, as a feasible constraint set indicates the possibility of a property violation, we collect \emph{the batch of input neuron assignments} during each solving substep and pass them to the model to check if they constitute an adversarial example which falsifies the property.
% 
We repeat this process until a conclusive result is obtained; or until the time/iteration threshold has exceeded, indicating inconclusive result.


\subsection{GPU-aided Linear Programming Solver}
\label{sec:theorem}
This subsection presents our theorem of transforming a constrained linear programming problem into an unconstrained solving problem {\em amenable to GPU acceleration}.

\textbf{Preliminaries.} Given a network with $L+1$ layers and each layer corresponds to a layer index, the \emph{input layer} is at index $0$ and the output layer is at index $L$. 
% 
We denote the set of all ReLU layer indexes as $[R]$, the set of all linear layer indexes with one connected preceding layer as $[L_1]$, the set of all indexes of linear layers that take two preceding layers as $[L_2]$.
% 
We assume that $[R] \cup [L_1] \cup [L_2] = [1, \dots, L]$ and both $1, L \in [L_1]$.
% 
The output and input/preceding layer of a ReLU layer are respectively represented by $\hat{x}^{(i)}$ and $\hat{x}_p^{(i)}$, for $i \in [R]$.
% 
Given a neuron index $j$ and a layer index $i$, $\hat{x}^{(i)j}$ represents the j-th neuron at i-th layer and $\hat{x}_p^{(i)j}$ refers to its input neuron.
% 
Symbol $x^{(i)}, i \in [L_1]\cup [L_2]$ represents the output of a linear layer; symbols $\hat{x}^{(0)}, x^{(0)}$ both denote the input layer.
% 
Symbol $x_p^{(i)}, i \in [L_1]$ refers to the predecessor of layer $x^{(i)}$ for $i \in [L_1]$; whereas $x_{p_1}^{(i)}, x_{p_2}^{(i)}$ are the two preceding layers of layer $x^{(i)}$ for $i \in [L_2]$.
% 
Finally, we designate $S(i)$ as a set that includes the indexes of all connected succeeding layers of layer $i$ and $i_s \in S(i)$; the set $S^2(i)=\cup_{i_s \in S(i)} S(i_s)$, which includes the successors' indexes of succeeding layers of layer $i$ and $i_{s^2} \in S^2(i)$.
% \vspace{-1em}
\begin{theorem}
\label{theorem}
The constrained optimization problem in neural network verification (as shown in \autoref{eq:oriFormulate}) can be transformed into an unconstrained problem in \autoref{eq:oriFormulate2} by using Lagrangian dual.
\end{theorem}
\begin{proof}
The derivation can be found at \textcolor{blue}{\href{https://github.com/Grena-verifier/misc-files/blob/master/theorem_proof.pdf}{this appendix}} \footnote{https://github.com/Grena-verifier/misc-files/blob/master/theorem\_proof.pdf}.
\end{proof}
In detail, the constrained problem formulation is given as:
% 
\begin{equation}\begin{gathered}
\label{eq:oriFormulate}
\min_{x, \hat{x}} c^{(0)} \hat{x}^{(0)} + \sum_{i \in [R]} c^{(i)T} \hat{x}_p^{(i)}\\
% 
\st{} l^{(0)} \leq \hat{x}^{(0)} \leq u^{(0)};  H x^{(L)} + d \leq 0 \\
% 
x^{(i)} = W^{(i)} x_p^{(i)} + b^{(i)}, \for{} i \in [L_1] \\
% 
x^{(i)} = x_{p_1}^{(i)} + x_{p_2}^{(i)}, \for{} i \in [L_2] \\
% 
\hat{x}^{(i)j} = \hat{x}_p^{(i)j}, \for{} i \in [R], j \in I^{+(i)} \\
% 
\hat{x}^{(i)j} = 0, \for{} i \in [R], j \in I^{-(i)} \\
%
\hat{x}^{(i)j} \geq 0, \hat{x}^{(i)j} \geq \hat{x}_p^{(i)j}, \for{} i \in [R], j \in I^{\pm(i)} \\
% 
\hat{x}^{(i)j} \leq \frac{u^{(i)j}}{u^{(i)j}-l^{(i)j}}(\hat{x}_p^{(i)j} - l^{(i)j}), \for{} i \in [R], j \in I^{\pm(i)} \\
% 
P^{(i)}\hat{x}_p^{(i)} + \hat{P}^{(i)}\hat{x}^{(i)} - p^{(i)} \leq 0, \for i \in [R]
\end{gathered}\end{equation}
% 
In detail, $l^{(0)}, u^{(0)}$ record the lower and upper bounds of input neurons; $Hx^{(L)} + d \leq 0$ represents the output constraints that encode the existence of multiple adversarial examples.
% 
For ReLU neurons, their functionalities depend on the stability statuses.
% 
For example, suppose a linear layer $i$ is followed by a ReLU layer $i_s$.
% 
A ReLU neuron is stably activated if it takes a non-negative input interval, in which case it equals the input neuron, and we collect the indexes of those non-negative input neurons at layer $i$ as $I^{+(i)}$.
% 
Stably deactivated ReLU neurons have non-positive inputs, with outputs that are always evaluated to 0, and we denote the indexes of those non-positive input neurons as a set $I^{-(i)}$.
% 
Unstable ReLU neurons take both positive and negative input values, their corresponding input neuron indexes are recorded in $I^{\pm(i)}$.
% 
In particular, the unstable ReLU neuron is approximated by an orange-colored triangle shape as \autoref{fig:reluapprox} illustrates, where $l^{(i)j}, u^{(i)j}$ record its input interval and $\frac{u^{(i)j}}{u^{(i)j}-l^{(i)j}}$ is abbreviated as $s^{(i)j}$.

Constraints $P^{(i)}\hat{x}_p^{(i)} + \hat{P}^{(i)}\hat{x}^{(i)} - p^{(i)} \leq 0$ capture the dependencies of multiple ReLU neurons in the same layer, which is obtained from the \wrelu \cite{DBLP:journals/pacmpl/MaLB24} method to improve solving precision.
% 
The coefficients $c^{(0)}$ and $c^{(i)}, i \in [R]$ are used to control the objective function. 
% 
As we aim to resolve the input neurons as well as the input lower and upper bounds of unstable ReLU neurons to refine the abstraction, we only set one element among $c^{(0)}, c^{(i)}, i \in [R]$ as 1 (for lower bound computation) or -1 (for upper bound) for the respective neuron, the rest of the elements are set as 0.

Eventually, we transform the constrained solving problem into an unconstrained one using Lagrangian variables as shown below, where we annotate $[x]_+ = \max{(x,0)}, [x]_{-} = -\min{(x,0)}$:

\begin{equation}\begin{gathered} 
\label{eq:oriFormulate2}
\max_{\gamma, v, \pi, \alpha} l^{(0)}[c^{(0)T} - v^{(1)T} w^{(1)}]_+  - u^{(0)}[v^{(1)T} w^{(1)} - c^{(0)T}]_+ + \gamma^T d\\
 + \sum_{i \in [R]} \sum_{j \in I^{\pm(i)}} [\hat{v}^{(i)j}]_+\cdot s^{(i)j} \cdot l^{(i)j} 
 % 
- \sum_{i \in [R]} \pi^{(i)T} p^{(i)}
% 
- \sum_{i \in [L_1]} v^{(i)T} b^{(i)}\\
% 
\st{} v^{(L)} = - H^{T} \gamma; \ \gamma,\pi \geq 0; \ \alpha \in [0,1]\\
% 
\for{} i \in [L_1] \cup [L_2], i_s \in [R] \cap S(i) , i_s \notin [L_2]: \\
% 
v^{(i)j} = -c^{(i_s)j}, j \in I^{-(i)}\\
% 
v^{(i)j} = \sum_{i_{s^2} \in S(i_s) \cap [L_1]} v^{(i_{s^2})T} W^{(i_{s^2})}_{:,j} - c^{(i_s)j}, j \in I^{+(i)}\\
% 
\for{} j \in I^{\pm(i)}: v^{(i)j} = s^{(i_s)j} [\hat{v}^{(i_s)j}]_+ - c^{(i_s)j} 
% 
- \pi^{(i_s)T} P^{(i_s)}_{:,j} - \alpha^{(i_s)j} [\hat{v}^{(i_s)j}]_{-}\\
% 
\hat{v}^{(i)j} = \sum_{i_{s^2} \in S(i_s) \cap [L_1]} v^{(i_{s^2})T} W^{(i_{s^2})}_{:,j} - \pi^{(i_s)T} \hat{P}^{(i_s)}_{:,j}\\
% 
\for{} i \in [L_1] \tand{} i_s \in [L_2] \cap S(i) \tand{} i_s \notin [R]:\\
% 
v^{(i)} = v^{(i_s)} 
\end{gathered}\end{equation}
% 
Any valid setting of $\gamma,\pi \geq 0; \alpha \in [0,1]$ leads to a safe lower bound of the original problem.
% 
Based on the values of $\gamma,\pi, \alpha$, we compute the values of $v^{(i)}$ and $\hat{v}^{(i)}$ in reverse order from $v^{(L)}$ to $v^{(0)}$.
% 
Using all assignments of variables, we could compute the objective value.
% 
In practice, the solving process starts with a valid initialization of $\gamma,\pi, \alpha$, then we optimize these variables using gradient information.


\begin{figure}[!ht]
  \centering
  \begin{tikzpicture}[scale=1]
    % Draw axes
    \draw [->,thick] (0,0) -- (0,1.6) node (yaxis) [above] {$\hat{x}^{(i)j}$};
    \draw [->,thick] (-2.5,0) -- (2.5,0) node (xaxis) [right] {$\hat{x}_p^{(i)j}$};
    % Draw lines
    \draw [thick]  (0,0) -- (1.2, 1.2) node (p1) [right] {$\hat{x}^{(i)j}=\text{max}(0,\hat{x}_p^{(i)j})$};
    \draw [thick] (0,0) -- (-1, 0) node (p2) [below] {$l^{(i)j}$};
    \draw [dashed] (1.2, 1.2) -- (1.2,0) node (p3) [below] {$u^{(i)j}$};
    \draw[fill=orange,opacity=0.4] (0,0) -- (1.2, 1.2) -- (-1,0) -- cycle;
    % Fill the dot
    \fill[black] (1.2, 1.2) circle (1.5pt);
    \fill[black] (-1, 0) circle (1.5pt);
  \end{tikzpicture}
  \caption{The approximation of a ReLU neuron}
  \label{fig:reluapprox}
\end{figure}

\begin{algorithm}
\begin{flushleft}
\textbf{Input:}
\vspace{-1em}
\begin{itemize}
\item $M$: neural network model
\item $\mathcal{L}_L$: list of old lower bounds for all ReLU and input layers
\item $\mathcal{L}_U$: list of old upper bounds for all ReLU and input layers 
\item $\Pi$: output constraints
\item $\Theta$: WraLU constraints
\end{itemize}
\textbf{Output:} improved lower and upper bounds
\end{flushleft}
\begin{algorithmic}[1]
   \State $\mathcal{S} \gets \Call{create\_solver\_model}{M, \mathcal{L}_L, \mathcal{L}_U, \Pi, \Theta}$
   \State $list\_new\_L, list\_new\_U \gets [], []$ \Comment{initialization}
   \For{$i$ \textbf{in} $\Call{range}{\text{len}(\mathcal{L}_L)}$} \Comment{solve for each layer}
       \State $\mathcal{S} .\Call{set\_layer}{i}$ \Comment{reset to solve for this layer}
       \State $\mathcal{S} .\Call{initalize\_lagrangian\_vars}{ }$
       \State $max\_obj \gets \Call{train\_until\_convergence}{\mathcal{S}}$ 
       \State $N_L, N_U \gets \Call{get\_new\_bounds}{\mathcal{L}_L[i], \mathcal{L}_U[i], max\_obj}$ 
       \Statex \Comment{improve old bounds based on solved values}
       \State $list\_new\_L.\Call{append}{N_L}$ \Comment{record updated bounds}
       \State $list\_new\_U.\Call{append}{N_U}$
   \EndFor
   \State \Return $list\_new\_L, list\_new\_U$
\end{algorithmic}
\caption{Bounds tightening procedure}
\label{algo:lpsolve}
\end{algorithm}

Algorithm \autoref{algo:lpsolve} shows the process of solving tighter bounds for each layer by training Lagrangian variables.
% 
While Lagrangian multipliers are commonly used in prior works \cite{DBLP:journals/corr/abs-2103-06624,DBLP:conf/nips/XuS0WCHKLH20,DBLP:conf/iclr/FerrariMJV22,DBLP:conf/nips/KothaBKD023}, to the best of our knowledge, our method is the first to apply them to spurious-adversarial-label-guided refinement.
% 
Furthermore, we incorporate multi-neuron constraints, output constraints and $L_2$ layer constraints that explicitly consider two preceding layers, which enhances the theoretical rigor of residual network verification.
% 
% 
\section{Experiments}
% 
We compare the performance of our prototypical verifier \grena{} with SOTA verifiers including the incomplete tool \wrelu{} \cite{DBLP:journals/pacmpl/MaLB24} and the complete tool \crown{} \cite{AlphaBetaCrownSystem} - the winner of VNNCOMP (International Verification of Neural Networks Competition).
% 
In addition, we compare our tailored LP solver with SOTA \gurobi{} with respect to returned bound tightness and execution time.
% 
\subsection{Experiment Setup}
% 
The dataset includes MNIST (denoted as `M') \cite{DBLP:journals/spm/Deng12} and CIFAR10 (shortened as `C') \cite{cifar10dataset}. 
% 
We test fully-connected (denoted as `FC'), convolutional (`Conv') and residual ('Res') networks with various sizes, that are obtained from the ERAN system \cite{ERANSystem} and VNNCOMP \cite{VNNCOMP22}.
% 
The number of intermediate layers (\#Layers), the number of intermediate neurons (\#Neurons), and the trained defense are enumerated in \autoref{tab:netdetails} (a trained defense is a defense method against adversarial examples to improve robustness of networks).
% 
\begin{table}[!ht]
\caption{Detailed information of the experimental networks}
  \centering
  \def\arraystretch{1.3}
   \addtolength{\tabcolsep}{0.35em}
  \begin{tabular}{|l|c|c|c|c|c|c|}
    \hline
    \textbf{Network} &
    \textbf{Type} &
    \textbf{$\epsilon$} &
    \textbf{\#Layers} &
    \textbf{\#Neurons} &
    \textbf{Defense}
    \\
    \hline
    M\_6x256 & FC & 0.033 & 6 & 1,010 &  None \\
    \hline
    M\_ConvSmall & Conv &  0.11 & 3 & 3,604 & None \\
    \hline
     M\_ConvMed & Conv & 0.1 & 3 & 5,704 &  None\\
    \hline
    M\_ConvBig & Conv & 0.313 & 6 & 48,064 &  DiffAI\cite{DBLP:conf/icml/MirmanGV18} \\
    \hline
    C\_ConvMed & Conv  & 0.006 & 3 & 7,144 & PGD\cite{DBLP:conf/iclr/MadryMSTV18} \\
    \hline
    C\_ConvBig & Conv  & 0.0078 & 6 & 62,464 & DiffAI \\
    \hline
    C\_Resnet4b & Res  & 0.0042 & 10 & 14,436 &  None \\
    \hline
    C\_ResnetA & Res  & 0.0033 &8 & 11,364 &   None  \\
    \hline
    C\_ResnetB & Res  & 0.012 &8 & 11,364 &   None  \\
    \hline
  \end{tabular}
  \label{tab:netdetails}
  \vspace{-1em}
\end{table}

\subsection{Comparison with SoTA Verifiers}
% 
To test the verification performance of \grena{}, we select 30 images from the datasets for each network to verify robustness and compare the results and time costs.
% 
To verify robustness, we choose a perturbation parameter $\epsilon$ for each tested network as indicated in \autoref{tab:netdetails} and apply the perturbation to each image.
% 
We check if all the “perturbed” images within $\epsilon$ will be classified the same as the original image by the networks as the perturbation is imperceptible to human eyes.
% 
If so, we conclude the robustness to be verified.
% 
Otherwise, if a counterexample with a different label is detected, we falsify the robustness property.
% 
If the analysis is inconclusive, we return {\em unknown} (abbreviated as `\#Unk') to the user.

The verification results of each tool and average execution time per image are shown in \autoref{tab:verifyResult}.
% 
\begin{table}[!ht]
\caption{The verification results of \wrelu{}, \crown{} and our system \grena{} with average execution time per image}
  \centering
  \def\arraystretch{1.2}
  \addtolength{\tabcolsep}{0.2em}
  \begin{tabular}{|c|c|c|c|c|c|}
    \hline
    \multirow{2}{*}{\textbf{Network}} &
    \multirow{2}{*}{\textbf{Methods}} &
    \multicolumn{4}{c|}{
      \makecell{Verification results}}\\
    \cline{3-6}
    & &
    \#Unk & \#Verify & \#Falsify & Time(s) \\
    \hline
% 
    \multirow{3}{*}{M\_6x256} &
    \wrelu{} & 27 & 3 & 0 &  26.6 \\
    \cline{2-6}
     & \crown{}  & 8 & \textbf{12} & \textbf{10} & 87.9 \\
    \cline{2-6}
    &  GRENA  & 15 & 7  & 8 & 195.5 \\
    \Xhline{0.8px} 
% 
    \multirow{3}{*}{M\_ConvSmall} &
    \wrelu{} & 17 & 13 & 0 & 7.2  \\
    \cline{2-6}
     & \crown{} & 0 & \textbf{26} & \textbf{4} & 5.8 \\
    \cline{2-6}
    &  GRENA  & 0 & \textbf{26}  & \textbf{4} & 35.7 \\
    \Xhline{0.8px}
% 
    \multirow{3}{*}{M\_ConvMed} &
    \wrelu{} & 15 & 15 &  0 & 18.0  \\
    \cline{2-6}
     & \crown{}  & 2 & 22 & \textbf{6} & 28.6 \\
    \cline{2-6}
    &  GRENA  & 0 & \textbf{24} & \textbf{6} & 42.9 \\
    \Xhline{0.8px}
    % 
    \multirow{3}{*}{M\_ConvBig} &
    \wrelu{} & 10 & \textbf{20} & 0 &  35.6 \\
    \cline{2-6}
     & \crown{} & 3 & \textbf{20} & 7 & 31.8 \\
    \cline{2-6}
    &  GRENA  & 2 &  \textbf{20} & \textbf{8} & 77.0 \\
    \Xhline{0.8px}
    % 
    \multirow{3}{*}{C\_ConvMed} &
    \wrelu{} & 19 & 11 & 0 & 60.8  \\
    \cline{2-6}
     & \crown{} & 8 & 17 & 5 & 84.8 \\
    \cline{2-6}
    &  GRENA  & 0 & \textbf{23}  & \textbf{7} & 119.9 \\
    \Xhline{0.8px}
    % 
    \multirow{3}{*}{C\_ConvBig} &
    \wrelu{} & 0 & \textbf{30} & 0 &  32.7 \\
    \cline{2-6}
     & \crown{}  & 2 & 28 & 0 & 25.1 \\
    \cline{2-6}
    &  GRENA  & 0 & \textbf{30} & 0& 58.0 \\
    \Xhline{0.8px}
    % 
    \multirow{3}{*}{C\_Resnet4b} &
    \wrelu{} &  7& 23 & 0 &  49.7 \\
    \cline{2-6}
     & \crown{} &  0 & \textbf{26} & \textbf{4} & 6.3 \\
    \cline{2-6}
    &  GRENA  & 0 & \textbf{26} & \textbf{4} & 87.8 \\
    \Xhline{0.8px}
    % 
    \multirow{3}{*}{C\_ResnetA} &
    \wrelu{} & - & - & - &  - \\
    \cline{2-6}
     & \crown{} & 0 & \textbf{27} & \textbf{3} & 10.2 \\
    \cline{2-6}
    &  GRENA  & 0 &  \textbf{27} & \textbf{3} & 77.6 \\
    \Xhline{0.8px}
    % 
    \multirow{3}{*}{C\_ResnetB} &
    \wrelu{} & - & - & - & -  \\
    \cline{2-6}
     & \crown{} & 8 & 16 & \textbf{6} & 100.2 \\
    \cline{2-6}
    &  GRENA  & 1 & \textbf{23}  & \textbf{6} & 191.5 \\
    \hline
  \end{tabular}
  \label{tab:verifyResult}
  \vspace{-2em}
\end{table}
% 
We can observe that we \emph{outperforms both \wrelu{} and \crown{}} with respect to precision as we return more conclusive results (either verified or falsified).
% 
In particular, we return \textbf{50.7\%} more conclusive images than \wrelu{} while \wrelu{} fails to handle two residual networks.
% num: 137
% 
Even compared with the complete tool \crown{}, our tool produces \textbf{13} more conclusive images in total and achieve \textbf{better or the same} verification/falsification precision on most networks.
% 
The empirical results demonstrate \emph{the strong verification efficiency of our system}. 
% \khoo{{\em Question: can put this as one of the contributions?}}


\subsection{Comparision with \gurobi{}}
% 
We now compare the bound-solving abilities of our tailored solver to those of \gurobi{} in the context of neural network encoding.
% 
We select one image for each network and collect all the constraints where we use the constraint set to solve all unstable neuron bounds and input bounds by our solver and \gurobi{}, later we compare the tightness of the solved bounds as visualized in \autoref{fig:boundcomp}.

\autoref{fig:boundcomp} depicts log-scale histograms of bound improvements for both \gurobi{} and our tailored solver, where "improvement" is defined as the original neuron bound minus the new neuron interval returned by the two solvers.
% 
The bar heights represent the number of neurons with improvements at the magnitude indicated on the x-axis. 
% 
Figure \ref{fig:mfc}, \ref{fig:cr4b}, \ref{fig:cra} and \ref{fig:crb} show significant overlap between the orange and blue bars, meaning our tailored solver achieved \textbf{comparable} improvements to \gurobi{}.
% 
It is noteworthy that the average solving time for \gurobi{} was 35503.32 seconds, while our GPU-accelerated solver took only 47.38 seconds, achieving an impressive \textbf{749$\times$} speedup.
% 
More results and details can be found in \textcolor{blue}{\href{https://github.com/Grena-verifier/Grena-verifier}{our Github repo}}.
% 
\begin{figure}[!ht]
\centering
  \begin{minipage}{0.96\linewidth}
    \begin{subfigure}[b]{0.49\textwidth}
      \centering
      \includegraphics[width=\textwidth]{M6x256_bounds_histogram_unseeded.png}
      \vspace{-1.6em}
      \caption{}%
      \label{fig:mfc}
    \end{subfigure}
    \hfill
    \begin{subfigure}[b]{0.49\textwidth}
      \centering
       \includegraphics[width=\textwidth]{MConvSmall_bounds_histogram_unseeded.png}
      \vspace{-1.6em}
      \caption{}%
      \label{fig:mcs}
    \end{subfigure}
    % \vspace{-0.5em}
  \end{minipage}

  \begin{minipage}{0.96\linewidth}
    \begin{subfigure}[b]{0.49\textwidth}
      \centering
      \includegraphics[width=\textwidth]{MConvMed_bounds_histogram_unseeded.png}
      \vspace{-1.6em}
      \caption{}%
      \label{fig:mcm}
    \end{subfigure}
    \hfill
    \begin{subfigure}[b]{0.49\textwidth}
      \centering
       \includegraphics[width=\textwidth]{CResNet4B_bounds_histogram_unseeded.png}
      \vspace{-1.6em}
      \caption{}%
      \label{fig:cr4b}
    \end{subfigure}
    % \vspace{-0.5em}
  \end{minipage}

  \begin{minipage}{0.96\linewidth}
    \begin{subfigure}[b]{0.49\textwidth}
      \centering
      \includegraphics[width=\textwidth]{CResNetA_bounds_histogram_unseeded.png}
      \vspace{-1.6em}
      \caption{}%
      \label{fig:cra}
    \end{subfigure}
    \hfill
    \begin{subfigure}[b]{0.49\textwidth}
      \centering
       \includegraphics[width=\textwidth]{CResNetB_bounds_histogram_unseeded.png}
      \vspace{-1.6em}
      \caption{}%
      \label{fig:crb}
    \end{subfigure}
    % \vspace{-0.5em}
  \end{minipage}
  \caption{Bound improvement comparison between our solver and \gurobi{}}
  \label{fig:boundcomp}
\end{figure}

In conclusion, our tailored LP solver \emph{obtained comparable bound improvements} compared to \gurobi{} while \emph{significantly reducing} the solving time.

\section{Related Works}
\label{sec:relatedwork}
Generally speaking, verification of deep neural networks is an NP-hard problem \cite{DBLP:journals/corr/KatzBDJK17}.
% 
Therefore, there are a series of incomplete verification methods that sacrifice completeness.
% 
Representative works include those abstract interpretation based \cite{DBLP:conf/sp/GehrMDTCV18,DBLP:conf/nips/SinghGMPV18,DBLP:journals/pacmpl/SinghGPV19,DBLP:conf/icml/MirmanGV18} or bound propagation based \cite{DBLP:journals/corr/abs-2007-10868,DBLP:conf/iccv/GowalDSBQUAMK19,DBLP:conf/icml/WengZCSHDBD18,DBLP:conf/nips/ZhangWCHD18,DBLP:conf/nips/WangPWYJ18}, etc.
% 
To mitigate the precision loss of incomplete methods, researchers have been relying on LP or MILP to encode the network more tightly.
% 
For example, DeepSRGR \cite{DBLP:conf/tacas/YangLLHWSXZ21} or ARENA \cite{DBLP:conf/vmcai/ZhongTK23} or PRIMA \cite{DBLP:journals/pacmpl/MullerMSPV22} systems would invoke the GUROBI solver to resolve LP and obtain tighter neuron intervals.
% 
However, the usage of an off-the-shelf solver on the CPU fails to leverage the nature of neural network encoding.

Inspired by works aiming to migrate the verification of neural networks to GPUs with the help of Lagrangian dual problems \cite{DBLP:journals/corr/abs-2103-06624,DBLP:conf/iclr/FerrariMJV22,DBLP:conf/nips/KothaBKD023}, we propose our tailored LP solver on GPU that benefits our LP formulation.
% 
Note that previous works \cite{DBLP:journals/corr/abs-2103-06624,DBLP:conf/iclr/FerrariMJV22,DBLP:conf/nips/KothaBKD023} only encode one-predecessor cases where the multiple predecessors would be concatenated into one.
% 
Although this could be handled by other engineering approaches, it lacks rigorous theoretical derivation.
% 
On the contrary, we explicitly encode multi-predecessor cases in our formulation.
% 
Furthermore, \cite{DBLP:journals/corr/abs-2103-06624,DBLP:conf/iclr/FerrariMJV22} only considers intermediate neuron constraints and \cite{DBLP:conf/nips/KothaBKD023} only includes output constraints in their constraint set, while our formulation captures both intermediate and output constraints.
% 
Lastly, to our knowledge, our method is the first to effectively deploy the Lagrangian dual problem to spurious-adversarial-label-guided refinement process.

\section{Conclusion}
\label{sec:conclusion}
In this paper, we propose a theorem to solve LP problem on GPU in the context of neural network verification.
% 
To the best of our knowledge, our work is the first to use  Lagrangian dual on spurious-adversarial-label guided refinement process and encode complex network constraints that take more than one predecessor, which enhances the scientific rigor of the verification of residual networks.
% 
We implemented our solving theorem in a GPU-based tailored solver; our empirical study strongly indicates that our tailored solver could return comparable solved values compared to \gurobi{} while obtaining significant speed gains.
% 
Furthermore, it enables our verifier \grena{} to return more conclusive results than SOTA verifiers within a reasonable amount of time, demonstrating the strong efficacy of our system.

\begin{credits}
\subsubsection{\ackname} This research is supported by a Singapore Ministry of Education Academic Research Fund Tier 1 T1-251RES2103. 

\subsubsection{\discintname} The authors have no competing interests to declare that are relevant to the content of this article.
\end{credits}
%
% ---- Bibliography ----
%
% BibTeX users should specify bibliography style 'splncs04'.
% References will then be sorted and formatted in the correct style.
%
% \bibliographystyle{splncs04}
% \bibliography{mybibliography}
%
\bibliographystyle{splncs04}
\bibliography{ref}
\end{document}
