\documentclass{midl} % Include author names

% The following packages will be automatically loaded:
% jmlr, amsmath, amssymb, natbib, graphicx, url, algorithm2e
% ifoddpage, relsize and probably more
% make sure they are installed with your latex distribution

\usepackage{mwe} % to get dummy images
\usepackage{booktabs}
\usepackage{makecell}
\usepackage{comment}
\usepackage{multirow}
\usepackage{floatrow}
\usepackage{soul}
\usepackage{color}
\usepackage{xcolor}
\floatsetup[table]{capposition=top}
\newfloatcommand{capbtabbox}{table}[][]
\newcommand{\tomcomment}[1]{\textcolor{red}{\textit{[TOM: #1]}}}
\newcommand{\revised}[1]{\textcolor{red}{#1}}

% \jmlrvolume{-- Under Review}
% \jmlryear{2026}
% \jmlrworkshop{Full Paper -- MIDL 2026 submission}
% \editors{Under Review for MIDL 2026}
\jmlryear{2026}
\jmlrworkshop{Full Paper -- MIDL 2026}
\jmlrvolume{-- 191}
\editors{Accepted for publication at MIDL 2026}
\title[Clinical Risk-Aware Grading for Coronary Artery Stenosis]{Clinical Risk-Aware Multi-Level Grading for Coronary Artery Stenosis through Curved Feature Reconstruction}

 % Use \Name{Author Name} to specify the name.
 % If the surname contains spaces, enclose the surname
 % in braces, e.g. \Name{John {Smith Jones}} similarly
 % if the name has a "von" part, e.g \Name{Jane {de Winter}}.
 % If the first letter in the forenames is a diacritic
 % enclose the diacritic in braces, e.g. \Name{{\'E}louise Smith}

 % Two authors with the same address
 % \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\and
 %  \Name{Author Name2} \Email{xyz@sample.edu}\\
 %  \addr Address}

 % Three or more authors with the same address:
 % \midlauthor{\Name{Author Name1} \Email{an1@sample.edu}\\
 %  \Name{Author Name2} \Email{an2@sample.edu}\\
 %  \Name{Author Name3} \Email{an3@sample.edu}\\
 %  \addr Address}


% Authors with different addresses:
% \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\\
% \addr Address 1
% \AND
% \Name{Author Name2} \Email{xyz@sample.edu}\\
% \addr Address 2
% }

%\footnotetext[1]{Contributed equally}

% More complicate cases, e.g. with dual affiliations and joint authorship
\midlauthor{\Name{Shishuang Zhao\midljointauthortext{Contributed equally}\nametag{$^{1}$}} \orcid{0000-0002-6311-3358} \Email{shishuang.zhao@yizhun-ai.com}\\
\Name{Hongtai Li\midlotherjointauthor\nametag{$^{2}$}} \Email{20201099@cmu.edu.cn}\\
\Name{Junjie Hou\nametag{$^{1}$}} \Email{junjie.hou@yizhun-ai.com}\\
\Name{Yuhang Liu\midljointauthortext{Corresponding Author}\nametag{$^{1}$}} \orcid{0000-0001-5687-184X} \Email{yuhang.liu@yizhun-ai.com}\\
\addr $^{1}$ Yizhun Medical AI Co., Ltd\AND
\addr $^{2}$ The First Hospital of China Medical University \AND
}

\begin{document}

\maketitle

\begin{abstract}
% Secondly, clinical risk relevance is significant in stenosis grading, while incorporating it into the algorithm is not trivial.
Developing a multi-level grading model for coronary artery stenosis holds great clinical significance for the diagnosis of coronary artery disease. However, designing an effective multi-level deep learning algorithm faces significant challenges. Specifically, utilizing CCTA or 3D SCPR images alone presents inherent shortcomings: CCTA images are difficult to analyze due to the tortuous paths of blood vessels, while 3D SCPR images are prone to abnormal distortions that hinder accurate grading. Furthermore, different stenosis grades are associated with varying clinical risks, and incorporating this association into the algorithm is non-trivial. To address the former problems, we propose the Curved Feature Reconstruction (CFR) module, which uses vessel curves as prior and employs a point-by-point correspondence strategy to precisely align and fuse features from both 3D SCPR and CCTA images. Meanwhile, a Clinical Risk-Aware (CR) Loss is employed to introduce clinical risk relevance into the network training so that the algorithm can better align with the clinical diagnosis. The experimental results on a in-house dataset reveal that our approach significantly outperforms other methods, and several ablation studies also demonstrate the effectiveness of our proposed designs.
\end{abstract}

\begin{keywords}
Multi-level stenosis grading, Deep learning, CCTA, 3D SCPR
\end{keywords}

\section{Introduction}

% This is where the content of your paper goes.  Some random
% notes\footnote{Random footnote are discouraged}:
% \begin{itemize}
% \item You should use \LaTeX \cite{Lamport:Book:1989}.
% \item JMLR/PMLR uses natbib for references. For simplicity, here, \verb|\cite|  defaults to
%   parenthetical citations, i.e. \verb|\citep|. You can of course also
%   use \verb|\citet| for textual citations.
% \item Eprints such as arXiv papers can of course be cited \cite{Hinton:arXiv:2015:Distilling}. We recomend using a \verb|@misc| bibtex entry for these as shown in the sample bibliography.
% \item You should follow the guidelines provided by the conference.
% \item Read through the JMLR template documentation for specific \LaTeX
%   usage questions.
% \item Note that the JMLR template provides many handy functionalities
% such as \verb|\figureref| to refer to a figure,
% e.g. \figureref{fig:example},  \verb|\tableref| to refer to a table,
% e.g. \tableref{tab:example} and \verb|\equationref| to refer to an equation,
% e.g. \equationref{eq:example}.
% \end{itemize}

% \begin{table}[htbp]
%  % The first argument is the label.
%  % The caption goes in the second argument, and the table contents
%  % go in the third argument.
% \floatconts
%   {tab:example}%
%   {\caption{An Example Table}}%
%   {\begin{tabular}{ll}
%   \bfseries Dataset & \bfseries Result\\
%   Data1 & 0.12345\\
%   Data2 & 0.67890\\
%   Data3 & 0.54321\\
%   Data4 & 0.09876
%   \end{tabular}}
% \end{table}

% \begin{figure}[htbp]
%  % Caption and label go in the first argument and the figure contents
%  % go in the second argument
% \floatconts
%   {fig:example}
%   {\caption{Example Image}}
%   {\includegraphics[width=0.5\linewidth]{example-image}}
% \end{figure}

% \begin{algorithm2e}
% \caption{Computing Net Activation}
% \label{alg:net}
%  % older versions of algorithm2e have \dontprintsemicolon instead
%  % of the following:
%  %\DontPrintSemicolon
%  % older versions of algorithm2e have \linesnumbered instead of the
%  % following:
%  %\LinesNumbered
% \KwIn{$x_1, \ldots, x_n, w_1, \ldots, w_n$}
% \KwOut{$y$, the net activation}
% $y\leftarrow 0$\;
% \For{$i\leftarrow 1$ \KwTo $n$}{
%   $y \leftarrow y + w_i*x_i$\;
% }
% \end{algorithm2e}

Coronary artery stenosis, which refers to the narrowing of coronary arteries, has been recognized as the primary indicator of coronary artery disease (CAD)\cite{jensen2020ischemic,naghavi2003vulnerable,otsuka2013napkin, 10635120,10.1007/978-3-031-19803-8_22,10.1007/978-3-031-43990-2_71,11094417}. 
Accurate grading of stenosis is crucial for the effective diagnosis of CAD through Coronary Computed Tomographic Angiography (CCTA), a widely used non-invasive diagnostic examination\cite{mowatt200864}. 
In clinical practice, stenosis can be graded on a 5-level scale, from 1 to 5, representing the increasing degrees of stenosis: minimal (1–24\%), mild (25–49\%), moderate (50–69\%), severe stenosis (70–99\%), and occluded vessel (100\%)\cite{cury2022cad}. 
% 
Different stenosis grades correspond to different heart health risks, which leads to various medical interventions and treatments. Therefore, achieving accurate multi-level stenosis grading is a significant step towards the development of automatic computer-aided diagnosis system for CAD.



To date, several studies have focused on multi-level stenosis grading\cite{zreik2018recurrent,ma2021transformer,tejero2019texture,kiricsli2013standardized,kelm2011detection,coronaryrcnn}. However, a common issue with these works is the insufficient incorporation of prior clinical knowledge, as shown in \figureref{fig:intro}. \textbf{Firstly}, clinicians primarily analyze CCTA images to diagnose CAD, while previous studies\cite{zreik2018recurrent,ma2021transformer,coronaryrcnn} have relied on 3D Straightened Curved Planar Reformation (3D SCPR) images, which may impede the analysis process due to abnormal distortions caused by the reformation mechanism, although 3D SCPR images provide information on the 3D geometry of arteries. \textbf{Secondly}, clinicians have different medical treatments for different stenosis grades, and the observation of moderate or greater stenosis serves as a critical diagnostic criterion for CAD\cite{cury2022cad}. As the primary expert consensus document, CAD-RADS 2.0\cite{cury2022cad} provides a standardized framework for grading stenosis from 1 to 5. Crucially, it also states that patients with stenosis grade greater than 2 are at higher risk and may require further assessments and anti-anginal therapy, while those with grade 1 or 2 only need preventive pharmacotherapy\cite{cury2022cad}. Thus, a clinical risk boundary exists between grade 1/2 and grade 3/4/5, and misclassification on either side of the boundary can have serious consequences. However, previous studies did not consider the clinical risk relevance of stenosis grading. They either focused on crude binary classification\cite{zreik2018recurrent,ma2021transformer,tejero2019texture} or stenosis rate regression\cite{kiricsli2013standardized,kelm2011detection,coronaryrcnn}, treating different stenosis grades equally, which does not meet the demands of clinical practice. Additionally, accurately defining and acquiring stenosis rates is challenging due to the complex nature of the coronary lumen space.
\begin{figure}[t]
    \centering
    \includegraphics[width=12cm]{intro.pdf}
    \caption{
    Illustrations of the challenges and our solutions. In the left figure, identifying suspicious stenosis solely through 3D SCPR images is difficult due to abnormal distortions (indicated by colored arrows), while inclusion of CCTA images can provide more comprehensive information for stenosis grading. 
     In the right figure, we propose CR Loss to encode the important clinical risk boundary explicitly into the model training.} 
     
    \label{fig:intro}
\end{figure}

To address the shortcomings of previous methods, we propose a novel deep learning-based approach for achieving multi-level stenosis grading. Our approach consists of two key designs to tackle the aforementioned problems. Firstly, we introduce the \emph{Curved Feature Reconstruction} (CFR) module to address the limitations of 3D SCPR images. The CFR module integrates features from CCTA images with features from 3D SCPR images, using vessel curve priors to establish a connection between the two image types. By combining the undistorted CCTA images, which retain the original texture details around stenosis regions, with 3D SCPR images, a more comprehensive analysis of stenosis can be achieved. Secondly, to align network training with clinical practice, we propose a novel loss function called \emph{Clinical Risk-Aware Loss} (CR Loss). Specifically, we define the
CR Distance to quantify the dissimilarity between different grades. We treat stenosis grading as a classification task, with the CR Distance serving as the metric function for the encoding of the ground truth probability distribution, explicitly incorporating clinical relevance into the network.

Our contributions are three-fold. (1) We propose the CFR module, which enables comprehensive stenosis analysis from both 3D SCPR and CCTA images. (2) We introduce the CR Loss, a novel loss function that explicitly encodes the important clinical risk boundary into network training. (3) We introduce two new metrics, MCRE and AMCRE, which measure the clinical risk relevance of stenosis grading results. Our method significantly outperforms other approaches in these metrics, indicating its better alignment with clinical needs.



\section{Related Works}
Regarding model input, most previous studies solely use 3D SCPR images as input\cite{DBLP:conf/aaai/MaFZL0WQ0025, DBLP:conf/miccai/MaZFLLWWQGL24, ma2021transformer,zreik2018recurrent,coronaryrcnn,candemir2020automated}, which are prone to abnormal distortions and lacks the original texture details. In terms of methodology, previous methods for stenosis grading can be divided into two categories: stenosis rate regression\cite{kiricsli2013standardized,kelm2011detection,shahzad2013automatic,coronaryrcnn} and binary classification\cite{zreik2018recurrent,ma2021transformer,denzinger2019coronary,tejero2019texture}. 
However, estimating vessel lumen accurately for stenosis rate calculation is a challenging task due to the complex nature of coronary lumen space, and the stenosis grade cannot be determined if the reference lumen is either non-existent or unreliable\cite{cury2022cad}. Moreover, crude binary classification is not sufficient to meet the clinical requirements. Therefore, none of these approaches fulfills the requirements of the multi-level grading mechanism in clinical diagnosis.
In this paper, we present the first deep learning-based multi-level stenosis grading method to facilitate the practical utilization.

\section{Method}
In this section, we will elaborate on the details of our method. 
Let $\mathbf{I} \in \mathbb{R}^{H \times W \times D}$, $\mathcal{S} = \{ \mathbf{p}_i \}_{i=1}^N$ and $\overline{\mathcal{S}} = \{ \mathbf{p}_i | \mathbf{p}_i \in \mathcal{S}\}_{i=s}^e$ denote the CCTA image, the centerline points, and stenosis instance respectively, where $\mathbf{p}_i \in \mathbb{R}^{3}$ is the 3D point location; $N$ is the number of centerline points; and $1 \leq s < e \leq N$.
The stenosis instance is a vessel segment consisting of several consecutive centerline points actually.
Given a CCTA image and a stenosis instance $\overline{\mathcal{S}}$, the goal of the stenosis grading task is to predict the grade $y \in \{1,2,3,4,5\}$ for $\overline{\mathcal{S}}$.

Our system pipeline (\figureref{fig:model}) can be divided into two parts:
1) the \emph{Curved Feature Reconstruction} module to extract features from 3D SCPR image and CCTA patch and fuse them to predict the stenosis grade (Section~\ref{method:cfr});
and 2) a novel \emph{Clinical Risk-Aware Loss} to incorporate clinical relevance into the network training (Section~\ref{method:crae}).
The following sections provide details of our method.

\subsection{Generation Methodology of 3D SCPR Images}
Given a CCTA image $\mathbf{I}$ and a sequence of uniformly sampled centerline coordinates $\mathcal{S} = \{ \mathbf{p}_i \}_{i=1}^N$ (which are pre-computed by external algorithms and are beyond the scope of this study), a 3D SCPR image can be generated\cite{scpr}. 

For each $i\in [1, N]$, we compute the unit tangent vector $\mathbf{T}_i$. Subsequently, we derive two mutually orthogonal unit vectors, the normal $\mathbf{}{}{M}_i$ and binormal $\mathbf{B}_i$, which define the cross-sectional plane perpendicular to the centerline path at $p_i$. On each cross-sectional plane, $H_\text{s} \times W_\text{s}$ coordinates are sampled to form a planar grid, with $p_i$ positioned precisely at the grid's center. By performing bilinear interpolation on these coordinates within the CCTA image $\mathbf{I}$, a 2D cross-sectional slice is obtained. After repeating this process for all $N$ points, the resulting slices are stacked sequentially to construct a 3D SCPR image $\mathbf{I}_{\text{scpr}} \in \mathbb{R}^{H_\text{s} \times W_\text{s} \times N}$. The code used to generate the 3D SCPR images is publicly available at \url{https://github.com/zhaoshishuang/3D-SCPR-Generation}.

By incorporating the vessel centerline as prior information, 3D SCPR images enable the algorithm to perceive the vessels in a straightened state, thereby simplifying the identification of stenosis. However, due to the inherent torsion of 3D centerlines, local planar grids tend to rotate inconsistently at different positions. This leads to rotational misalignment, where the radial orientation of points fails to align, causing anatomical structures to appear twisted or spiraled in the final 3D SCPR images. This is a systemic issue inherent to the design.
\subsection{Curved Feature Reconstruction} \label{method:cfr}
Previous works\cite{ma2021transformer,zreik2018recurrent,coronaryrcnn,candemir2020automated} usually overlook the presence of abnormal distortions in 3D SCPR images, which may adversely affect stenosis grading.
In this work, we incorporate the undistorted CCTA images into the network inputs to provide original texture around stenosis regions that may be lost in 3D SCPR images.

\begin{figure}[t]
    \centering
    \includegraphics[width=14cm]{method.pdf}
    \caption{The overall framework of our method. Firstly, We obtain CCTA patch and 3D SCPR image as model inputs using CCTA image and input centerlines. Then CFR module is adopted to extract and fuse features from the inputs for stenosis grading.  To incorporate clinical risk boundaries into neural network training, we finally apply a novel CR Loss for loss calculation.}
    \label{fig:model}
\end{figure}

Firstly, as the stenosis grade is determined by comparing the stenotic part with the normal part, we extend stenosis instance $\overline{\mathcal{S}} = \{ \mathbf{p}_i | \mathbf{p}_i \in \mathcal{S}\}_{i=s}^e$ along the vessel centerline to get $\hat{\mathcal{S}} = \{ \mathbf{p}_i | \mathbf{p}_i \in \mathcal{S}\}_{i=s-m}^{e+m}$, where $m$ is a hyper-parameter, to increase the vascular perceptual field.
Then the 3D SCPR image $\mathbf{I}_{\text{scpr}} \in \mathbb{R}^{H_\text{s} \times W_\text{s} \times D_\text{s}}$ is obtained given the centerline points $\hat{\mathcal{S}}$ and the original CCTA image $\mathbf{I}$.
% where $D_\text{s}$ is the length of $\hat{\mathcal{S}}$.
Besides, we crop a patch $\mathbf{I}_{\text{ccta}} \in \mathbb{R}^{H_\text{c} \times W_\text{c} \times D_\text{c}}$ from CCTA image $\mathbf{I}$, which fully contains the vascular regions of $\hat{\mathcal{S}}$, to preserve more texture details.
Finally, we feed $\mathbf{I}_{\text{scpr}}$ and $\mathbf{I}_{\text{ccta}}$ into the network to extract features separately and fuse them together to predict the stenosis grade.

\noindent \textbf{Feature Extraction and Fusion.}
We utilize two separate 3D ResUNet\cite{yu2017volumetric} as backbones to extract 3D SCPR features $\mathbf{F}_{\text{scpr}}\in \mathbb{R}^{H_\text{s} \times W_\text{s} \times D_\text{s} \times C}$ and CCTA features $\mathbf{F}_{\text{ccta}}\in \mathbb{R}^{H_\text{c} \times W_\text{c} \times D_\text{c}\times C}$, where $C$ is the channel size.


As $\mathbf{F}_{\text{ccta}}$ has the same shape as $\mathbf{I}_{\text{ccta}}$, the CCTA stenosis instance features can be directly acquired by indexing the stenosis instance coordinates $\mathbf{p}_i \in \overline{\mathcal{S}}$ from $\mathbf{F}_{ccta}$. 
We then use an MLP to project the instance features into new space and obtain $\overline{\mathbf{F}}_{\text{ccta}} \in \mathbb{R}^{(e-s+1)\times C}$.%, where $K$ denotes the length of stenosis instance $\overline{\mathcal{S}}$.
\begin{equation}
    \overline{\mathbf{F}}_{\text{ccta}} = \text{MLP}\left(\bigg[\mathbf{F}_{\text{ccta}}[\mathbf{p}_s], ..., \mathbf{F}_{\text{ccta}}[\mathbf{p}_e] \bigg]\right)
\end{equation}

where $\mathbf{p}_i \in \overline{\mathcal{S}}, \mathbf{F}_{\text{ccta}}[\mathbf{p}_i]$ means the local feature of $\mathbf{F}_{\text{ccta}}$ at position $\mathbf{p}_i$.

To obtain the 3D SCPR stenosis instance features, we first apply a 2D average pooling on $\mathbf{F}_{\text{scpr}}$ along the dimensions of $H_\text{s}$ and $W_\text{s}$ to get $\mathbf{F}_\text{scpr}' \in \mathbb{R}^{D_\text{s} \times C}$. 
Subsequently, the stenosis instance features are indexed from $\mathbf{F}_\text{scpr}'$ using the position of stenosis instance $\overline{\mathcal{S}}$ in points set $\hat{\mathcal{S}}$. 
And an MLP is then employed to generate $\overline{\mathbf{F}}_{\text{scpr}} \in \mathbb{R}^{(e-s+1)\times C}$.

\begin{equation}
    \overline{\mathbf{F}}_{\text{scpr}} = \text{MLP}\left(\bigg[\mathbf{F}_\text{scpr}'[m], ..., \mathbf{F}_\text{scpr}'[m+e-s]\bigg]\right)
\end{equation}

We then apply the element-wise addition on $\overline{\mathbf{F}}_{\text{ccta}}$ and $\overline{\mathbf{F}}_{\text{scpr}}$ to get the texture-enhanced stenosis instance features $\mathbf{F} = \overline{\mathbf{F}}_{\text{ccta}} + \overline{\mathbf{F}}_{\text{scpr}}$.

\noindent \textbf{Grade Prediction.}
Transformer\cite{vaswani2017attention} has been proven to perform well in many computer vision tasks\cite{dosovitskiy2020image,liu2021swin}.
Here we also apply an attention-based network for stenosis grade prediction.
Following \cite{dosovitskiy2020image}, we first add a learnable grade token $\mathbf{f}_\text{grade}$ to the stenosis instance features $\mathbf{F} = [\mathbf{f}^1, ..., \mathbf{f}^{e - s + 1}]$, where $\mathbf{f}^i \in \mathbb{R}^{C}$ is the $i$-th token of $\mathbf{F}$, to form a sequence of tokens $[\mathbf{f}_\text{grade}, \mathbf{f}^1, ..., \mathbf{f}^{e - s + 1}]$.
Then the tokens are added by learnable position embeddings $\mathbf{E}_{pos} \in \mathbb{R}^{(e - s + 2) \times C}$ and the resulting sequence serves as the input of the Transformer encoder, which contains multiple layers of multi-head self-attention (MSA) and MLP blocks.
\begin{equation}
\begin{aligned}
    \mathbf{g}_0 &= [\mathbf{f}_\text{grade}, \mathbf{f}^1, ..., \mathbf{f}^{e - s + 1}] + \mathbf{E}_{pos}\\
    \mathbf{g}_l' &= \text{MSA}(\text{LN}(\mathbf{g}_{l-1})) + \mathbf{g}_{l-1},\ l = 1\ ...\ L\\
    \mathbf{g}_l &= \text{MLP}(\text{LN}(\mathbf{g}_{l}')) + \mathbf{g}_{l}',\ l = 1\ ...\ L\\
    \mathbf{g} &= \text{LN}(\mathbf{g}_L^0)
\end{aligned}
\end{equation}
where $L$ is the number of layers in the encoder.
% $\mathbf{g}_L^0$ is the first token of $\mathbf{g}_L$.
The resulting feature $\mathbf{g} \in \mathbb{R}^{C}$ is then fed into an MLP 
% followed by a softmax layer 
to predict the final grade probability $\hat{P}$.
\begin{equation}
    \hat{P} = \text{Softmax}(\text{MLP}(\mathbf{g})) \in \mathbb{R^\text{5}}
\end{equation}


\subsection{Clinical Risk-Aware Loss} \label{method:crae}
Clinically, patients with different stenosis grades have varying clinical risks, requiring various treatment methods\cite{cury2022cad}.
% Hence, for the stenosis grading task, a good metric should consider the clinical risks associated with stenosis grades, and be able to assign a large penalty when the risk of the prediction differs significantly from the risk of the ground truth.
Therefore, an effective stenosis grading metric must be clinically risk-aware, applying a large penalty when predicted risk significantly deviates from the ground truth risk.


\noindent \textbf{CR Distance.}
Following the above intuition, we propose a novel Clinical Risk-Aware Distance (CR Distance) as the distance metric to measure the dissimilarity between prediction and ground truth. 
CR Distance takes into account the clinical risks associated with stenosis grades, which can be formulated as follows:
\begin{align}
\text{CRD}(a, b) =
\left\{
             \begin{array}{lr}
             (\left|a-b \right| + 1)^2, & \ a\leq 2 <b\  \ \text{or} \ b \leq 2 < a  \\
             \left| a-b \right| ,  \ \ &   \text{else}     \\
             \end{array}
\right. 
\end{align}
where a clinical risk boundary is between $1/2$ and $3/4/5$. 
When both inputs are on one side of the boundary, the CR Distance is equal to the average error as this type of mistake is not as significant. 
However, when inputs are on either side of the boundary, the CR Distance takes the form of a square error, with a penalty term 1 added. 
As a result, the distance between the grades on either side of the boundary measured by CR Distance is much larger, which is compatible with the clinical characteristic of stenosis grade.

\noindent \textbf{CR Loss.}
Stenosis grading is an ordinal regression problem and can be handled naturally by K-rank ordinal regression.
However, it is hard to leverage the clinical risk boundary for K-rank ordinal regression as it actually performs binary classification during training.
In our work, we approach stenosis grading as a classification task with a soft ground truth target\cite{diaz2019soft} $P = [P_1, ..., P_5]$:
\begin{align}\label{eq6}
P_{i}=\frac{\exp({-\phi\left(gt, i\right)})}{\sum_{j=1}^{5} \exp({-\phi\left(gt, j\right)})},
\end{align}
where $\phi(gt,i)$ is a metric function that measures the distance between the ground truth label $gt$ and grade $i \in \{1,2,3,4,5\}$. 

In this work, we propose to use the CR Distance as the metric function $\phi$ to introduce the clinical characteristics of stenosis into model training. 
As a result, the soft ground truth target can better reflect the clinically unacceptable confusion between stenosis grades 1/2 and 3/4/5. 
Our CR Loss is calculated by:
\begin{align}
\text{CR Loss} = -w_{gt}\sum_{i=1}^5 P_i \log(\hat{P}_i) = -w_{gt}\sum_{i=1}^5 \frac{\exp({-\text{CRD}\left(gt, i\right)})}{\sum_{j=1}^{5} \exp({-\text{CRD}\left(gt, j\right)})} \log(\hat{P}_i)
\end{align}
where $w_{gt}\in \mathbf{w}$  is a loss weight for $gt$ grade and $\mathbf{w}=[w_1, w_2, w_3, w_4, w_5]$ is a loss weights vector for all grades, as the number of each grade level is imbalanced. 
We also compare CR Distance with other metric functions (like absolute distance (AD) and square distance (SD)) in Table~\ref{tab:ablationloss} to demonstrate its superiority.

\section{Experiment}

\subsection{Experimental Settings}
\subsubsection{Dataset.}
We collect a private dataset comprised of 500 CCTA scans, which contains labels of centerline points of coronary arteries and stenosis instances with corresponding grades annotated by at least three experienced radiologists. The dataset is collected in compliance with the terms of the licensing agreement and ethical certification. %The dataset consists of 3914 stenosis instances including 1408 calcified plaques, 891 non-calcified plaques, and 1614 mixed plaques. 
{The number of stenosis instances in each grade level from 1 to 5 is 1654, 1443, 511, 266, and 40 respectively.} %The statistics of stenosis grades are listed in \tableref{tab:num}. 
We randomly split the dataset into train, validation and test set with a ratio of 3:1:1.


% \subsubsection{Evaluation metrics.}
\noindent \textbf{Evaluation metrics.}
% \label{subsection_metric}
Stenosis grading is essentially an ordinal regression task, and traditional evaluation metrics such as Quadratic Weighted Kappa (\textbf{QWK})\cite{warrens2012some}, Mean Absolute Error (\textbf{MAE}), and Average Mean Absolute Error (\textbf{AMAE}) are naturally suited for it.  However, these metrics do not take into account the clinical risk associated with each stenosis grade.  CR distance incorporated with clinical risk boundary is suitable for measuring the clinical risk relevance of stenosis grading results. Therefore, we introduce two new metrics based on CRD: Mean Clinical Risk-Aware Error (\textbf{MCRE}) and Average Clinical Risk-Aware Error (\textbf{AMCRE}), 
MCRE is defined as the average of CRD over all instances: $\text{MCRE} = \frac{1}{N} \sum_{i=1}^N \text{CRD}(pred_i, gt_i)$ and AMCRE is calculated as the average of MCRE for all classes $\text{AMCRE} = \frac{1}{C} \sum_{i=1}^C \text{MCRE}_i$.


\noindent \textbf{Implementation details.}
The loss weights $\mathbf{w}$, extend length $m$, channel size $C$, number of transformer layers $L$ and attention head number are set to $[1,1,2,3,4]$, 10, 64, 8 and 4 by default.
% The extend length $m=10$, channel size $C=64$, number of transformer layers $L=8$ and attention head number of 4 are set by default. 
The proposed model is trained for 150 epochs with 15 epochs warm up and cosine learning rate decay schedule. AdamW\cite{loshchilov2017decoupled} optimizer is adopted with learning rate of 0.001, weight decay of 0.001 and batch size of 32. 
All experiments are conducted on 8 NVIDIA GeForce RTX 3090 GPUs.

\subsection{Experiment Results}

%\noindent \textbf{Quantitative results.}
% We compare our method with other approaches\cite{coronaryrcnn,kelm2011detection} in \tableref{tab:sota}. 
In \tableref{tab:sota}, we compare our method with other approaches, including Random Forest Regression\cite{kelm2011detection}, Coronary R-CNN\cite{coronaryrcnn} and Cost-sensitive Classification\cite{csr}. Several baselines based on hard label multi-class classification, Continuous Label Regression and K-rank ordinal regression\cite{ordinal} are also provided for comparison. The results show that our approach outperforms all other methods by a large margin on almost all metrics. Particularly, our method exhibits superior performance in the metrics of MCRE and AMCRE, which are relevant to distinguish stenosis grades between 1/2 and 3/4/5. This aligns with the design objective of CR loss indicating the clinical risk-aware ability has been encoded into the network through CR loss.
% the performance of our method is more significant in the metrics of MCRE and AMCRE, indicating its superiority in distinguishing stenosis grades between 1/2 and 3/4/5, which aligns with the design purpose of CR loss. 

\tableref{tab:fenceng} provides a stratified performance analysis across individual grades, with the sampling count for each grade reported in the bottom row. We specifically report MAE and MCRE, as AMAE/AMCRE are equivalent to MAE/MCRE in this context, and QWK is mathematically inapplicable. As observed, our method achieves state-of-the-art performance in most grades, with the most significant gains observed in grade 5. Since grade 5 represents the highest clinical risk, misclassifying these cases as grade 1/2 would lead to delayed intervention and severe consequences. This stratified superiority underscores the clinical reliability of our method.

\begin{table}[t]
\centering
\caption{Comparison with other methods.}
\label{tab:sota}
\resizebox{15cm}{!}{
\begin{tabular}{c|c|c|c|c|c|c}
\toprule[1.0pt]
Method & Data Type & QWK$\uparrow$ & MAE$\downarrow$ & AMAE$\downarrow$  & MCRE$\downarrow$  & AMCRE$\downarrow$  \\
\hline
Random Forest Regression & 3D SCPR&0.378&0.684&1.141&2.224&4.555\\
Coronary R-CNN &3D SCPR &0.497&0.603&0.829&1.969&2.791\\
Multi-class Classification&3D SCPR&0.611&0.574&0.744&1.761&2.156\\
Cost-sensitive Classification&3D SCPR &0.673&0.515&0.598&1.503&1.575\\
Continuous Label Regression&3D SCPR &0.583&0.597&0.733&1.901&2.117\\
K-rank Ordinal Regression&3D SCPR&0.672&\textbf{0.436}&0.569&1.585&1.425\\
% K-rank Ordinal Regression&CCTA+3D SCPR&0.692&0.486&0.538&1.307&1.576\\
\textbf{Ours}&CCTA+3D SCPR&\textbf{0.712}&0.480&\textbf{0.533}&\textbf{1.296}&\textbf{1.186}\\



\bottomrule[1.0pt]
\end{tabular}}
\end{table}

\begin{table}[t]
\centering
\caption{Stratified comparison with other methods.}
\label{tab:fenceng}
\resizebox{15cm}{!}{
% \begin{tabular}{c|c|c|c|c|c|c|c|c|c|c}
\begin{tabular}{c|ccccc|ccccc}
\toprule[1.0pt]
\multirow{2}{*}{Method} & \multicolumn{5}{c|}{MAE$\downarrow$} & \multicolumn{5}{c}{MCRE$\downarrow$}\\
\cline{2-11}
&1&2&3&4&5&1&2&3&4&5\\
\hline
Random Forest Regression &0.416&0.728&0.852&1.209&2.500&1.465&2.272&2.113&4.224&12.70\\
Coronary R-CNN &0.441&0.735&0.487&0.881&1.600&1.818&2.206&1.139&2.493&6.300\\
Multi-class Classification&0.419&0.699&0.513&0.791&1.300&1.524&2.125&1.217&2.015&3.900\\
Cost-sensitive Classification&0.352&0.662&0.487&0.687&0.800&1.225&1.967&1.165&\textbf{1.418}&2.100\\
Continuous Label Regression&0.432&0.689&0.591&0.955&1.000&1.600&2.125&1.470&3.090&2.300\\
K-rank Ordinal Regression&\textbf{0.127}&0.676&0.513&0.731&0.800&1.349&2.007&1.191&1.776&0.800\\
% K-rank Ordinal Regression&CCTA+3D SCPR&0.692&0.486&0.538&1.307&1.576\\
\textbf{Ours}&0.292&\textbf{0.654}&\textbf{0.478}&\textbf{0.642}&\textbf{0.600}&\textbf{0.798}&\textbf{1.923}&\textbf{1.130}&1.478&\textbf{0.600}\\
\bottomrule[1.0pt]
Sample count per grade&315&272&115&67&10&315&272&115&67&10\\

\bottomrule[1.0pt]
\end{tabular}
}
\end{table}



% \subsubsection{Qualitative Results}
% \noindent \textbf{Qualitative results.}


\subsection{Ablation Study}
\noindent \textbf{Curved Feature Reconstruction.}
In \tableref{tab:ablationcfr}, we ablate the effectiveness of different data utilization strategies in CFR module, including CCTA combined with 3D SCPR, CCTA only, and 3D SCPR only. The results demonstrate that both 3D SCPR images and CCTA images are critical for accurate stenosis grading, and the combination of them can achieve the best performance.

Furthermore, we qualitatively validate this conclusion through visualization. Specifically, we randomly select 10 samples from the test set and use Grad-CAM\cite{selvaraju2017grad} to generate activation heat maps of the last layer in the backbone network. These heat maps are visualized in \figureref{fig:vis}, and to improve the visualization quality, we project and interpolate the generated 3D activation map into a 2D SCPR image. Since our method utilizes two separate backbones, we average the activation maps from these two backbones to generate a single combined activation map.

Our visualization results indicate that neither 3D SCPR images nor CCTA images can accurately identify the stenosis region on their own. However, the combination of them can effectively learn and highlight the accurate stenosis region. Therefore, our CFR module can improve performance by leveraging the complementary information from both 3D SCPR and CCTA images.

\begin{figure}[ht!]
    \centering
    \includegraphics[width=15cm]{exp-vis-new.pdf}
    \caption{%Activation Heat Map of The Last Backbone Layer. 
    Visual comparison of results using different data utilization strategies. The text on the right displays the ground truth (GT) in red, while the predicted results (Pred) and evaluation metrics: Absolute Error (AE) and Clinical Risk-Aware Error (CRE), are shown in black. QWK is omitted because it is not applicable to individual cases.}
    \label{fig:vis}
\end{figure}


\noindent \textbf{CR Loss.}
\label{exp:cr-loss}
To evaluate the impact of each component in our loss function, we conduct an ablation study on metric function used in \equationref{eq6}. The results shown in \tableref{tab:ablationloss} indicate the proposed CR distance can not only perform better in MCRE and AMCRE which are relevant with clinical risk, but also in traditional ordinal regression metrics.


\noindent \textbf{Stratified results.}
Similarly, \tableref{tab:ablationcfr_fenceng} and \tableref{tab:ablationloss_fenceng} report the stratified results across individual grades. It can be observed that our method consistently outperforms alternative data utilization strategies and metric functions across the majority of grades.

\begin{table*}
\begin{floatrow}
\capbtabbox{
\resizebox{1.0\columnwidth}{!}
{\begin{tabular}{cc|ccccccc}
\toprule[1.0pt]
\multicolumn{2}{c|}{Data Type} & \multirow{2}{*}{QWK$\uparrow$} & \multirow{2}{*}{MAE$\downarrow$} & \multirow{2}{*}{AMAE$\downarrow$}  & \multirow{2}{*}{MCRE$\downarrow$}  & \multirow{2}{*}{AMCRE$\downarrow$}  \\
\cline{1-2}
3D SCPR&CCTA&&&&&\\
\hline
\checkmark&&0.692&\textbf{0.473}&0.557&1.453&1.300\\
&\checkmark&0.694&0.506&0.568&1.335&1.215\\
\checkmark&\checkmark&\textbf{0.712}&0.480&\textbf{0.533}&\textbf{1.296}&\textbf{1.186}\\
\bottomrule[1.0pt]
\end{tabular}}
}{
 \caption{Ablation study on CFR module. }
 \label{tab:ablationcfr}
}
\capbtabbox{
\resizebox{1.0\columnwidth}{!}
{\begin{tabular}{ccc|ccccc}
\toprule[1.0pt]
\multicolumn{3}{c|}{Metric Function} & \multirow{2}{*}{QWK$\uparrow$} & \multirow{2}{*}{MAE$\downarrow$} & \multirow{2}{*}{AMAE$\downarrow$}  & \multirow{2}{*}{MCRE$\downarrow$}  & \multirow{2}{*}{AMCRE$\downarrow$}  \\
\cline{1-3}
AD&SD&CRD&&&&&\\
\hline
\checkmark&& &0.689&\textbf{0.475}&0.569&1.430&1.277\\
&\checkmark&& 0.689&0.511&0.564&1.445&1.249\\
% &&\checkmark&&0.654&0.477&0.693&\textbf{1.153}&1.941\\
&&\checkmark&\textbf{0.712}&0.480&\textbf{0.533}&\textbf{1.296}&\textbf{1.186}\\
\bottomrule[1.0pt]
\end{tabular}}
}{
 \caption{Ablation study on CR Loss.}
 \label{tab:ablationloss}
 \small
}
\end{floatrow}
\end{table*}


\begin{table}[t]
 \caption{Stratified results of the ablation study on CFR module. }
 \label{tab:ablationcfr_fenceng}

 \resizebox{15cm}{!}{
 \begin{tabular}{cc|ccccc|ccccc}
\toprule[1.0pt]
\multicolumn{2}{c|}{Data Type} & \multicolumn{5}{c|}{MAE$\downarrow$} & \multicolumn{5}{c}{MRCE$\downarrow$} \\
\hline
3D SCPR&CCTA&1&2&3&4&5&1&2&3&4&5\\
\hline
\checkmark&&\textbf{0.234}&0.684&0.496&0.672&0.700&1.052&2.040&1.200&1.507&0.700\\
&\checkmark&0.333&0.669&0.496&\textbf{0.642}&0.700&0.854&1.974&1.174&\textbf{1.373}&0.700\\
\checkmark&\checkmark&0.279&\textbf{0.654}&\textbf{0.478}&\textbf{0.642}&\textbf{0.600}&\textbf{0.838}&\textbf{1.923}&\textbf{1.130}&1.478&\textbf{0.600}\\
\bottomrule[1.0pt]
\end{tabular}
}
\end{table}



\begin{table}[H]
\resizebox{15cm}{!}{
\begin{tabular}{ccc|ccccc|ccccc}
\toprule[1.0pt]
\multicolumn{3}{c|}{Metric Function} & \multicolumn{5}{c|}{MAE$\downarrow$} & \multicolumn{5}{c}{MRCE$\downarrow$}  \\
\hline
AD&SD&CRD&1&2&3&4&5&1&2&3&4&5\\
\hline
\checkmark&& &\textbf{0.247}&0.684&0.487&0.627&0.800&1.018&2.070&1.139&1.358&0.800\\
&\checkmark&&0.333&0.691&0.513&\textbf{0.582}&0.700&1.053&2.092&1.191&\textbf{1.209}&0.700
\\
% &&\checkmark&&0.654&0.477&0.693&\textbf{1.153}&1.941\\
&&\checkmark&0.279&\textbf{0.654}&\textbf{0.478}&0.642&\textbf{0.600}&\textbf{0.838}&\textbf{1.923}&\textbf{1.130}&1.478&\textbf{0.600}\\
\bottomrule[1.0pt]
\end{tabular}
}
\caption{Stratified results of the ablation study on CR Loss.}
\label{tab:ablationloss_fenceng}
\small
\end{table}


\section{Conclusion}
In this paper, we propose a novel deep learning-based approach to achieve multi-level stenosis grading for the first time, as far as we know. To address the challenge of predicting stenosis grade accurately in the presence of abnormal distortions in 3D SCPR images, we introduce a novel CFR module that allows for a comprehensive stenosis analysis from both 3D SCPR and CCTA images. Furthermore, we incorporate the clinical relevance of stenosis grading by introducing CR Loss, which encodes the clinical risk boundary into neural network training. Through experiments conducted on a private dataset, we demonstrate that our method outperforms other approaches significantly in both traditional and clinical relevance metrics. %Several ablation studies also have verified the effectiveness of our proposed CFR module and CR Loss.


\clearpage  % Acknowledgements, references, and appendix do not count toward the page limit (if any)
% Acknowledgments---Will not appear in anonymized version
% \midlacknowledgments{This work was supported by Peking University People’s Hos- pital Research and Development Fund.}


\bibliography{midl26_191}


% \appendix

% \section{Proof of Theorem 1}

% This is a boring technical proof of
% \begin{equation}\label{eq:example}
% \cos^2\theta + \sin^2\theta \equiv 1.
% \end{equation}

% \section{Proof of Theorem 2}

% This is a complete version of a proof sketched in the main text.

\end{document}
