\clearpage

\appendix

\section{Algorithmic Description of SACP}
\label{app_algo}

In Algorithm~\ref{scpa_algo}, we describe the step-wise procedure and the required computation regarding applying SACP to incorporate spatial context in 3D voxel-wise segmentation and enhance uncertainty quantification.
\begin{itemize}

\item In Step 1, a pretrained segmentation model $f_{\Theta}$ generates voxel-wise predictive probabilities in an input volume $\mathcal{X}$ using the softmax function.

\item In Step 2, we apply class-conditional calibration to ensure a desired confidence rate of at least $1-\alpha$ for each class $\hat{y}\in \mathcal{Y}$ using $S_{\text{base}}$ non-conformity scores of calibration set of voxels. For each class, the $(1-\alpha)$-quantile threshold $\tau^{\hat{y}}_{\alpha}$ is determined, setting the baseline for prediction set construction.

\item In Step 3, we compute spatial properties as the Euclidean distances of each voxel $x \in \mathcal{X}$ to a set of critical masses $m\in \mathcal{M}$ denoted by $\delta_m$ and to the canonical object label $l \in \mathcal{Y}$ denoted by $\phi_l$.

\item In Step 4, we identify the nearest critical mass $v\in \mathcal{M}$ for each voxel, forming a spatial reference. Then, a normalized weight $w_v$ is computed for each voxel $x$ based on its proximity to the canonical object $l$ and the nearest critical mass $v$, adjusted by a mass-specific relevance factor $\gamma_v$. This weight modulates the base non-conformity score $S_{\text{base}}$ associated with the canonical object $l$ to refine uncertainty estimation relative to spatial critical structures. Finally, the prediction set $\mathcal{C}(x)$ is constructed by including the canonical object label $l$ in the set if and only if the adjusted score $S_{\text{SACP}}$ remains below its respective (class-conditional) quantile threshold $\tau^{l}_{\alpha}$.
\end{itemize}

By integrating spatial information, SACP improves the reliability of conformal prediction in 3D segmentation, particularly in anatomically structured regions where spatial coherence is essential.

\begin{algorithm2e}[t]
\caption{Spatially-Adaptive Conformal Prediction (SACP)}
\label{scpa_algo}
\DontPrintSemicolon
\LinesNumbered
% \SetAlgoNoEnd
\setstretch{1.2}
\KwIn{3D input volume $\mathcal{X}$: voxels $x$ with true labels $y$; Set of all possible labels $\hat{y} \in \mathcal{Y}$; set of critical masses $\mathcal{M}$; canonical object label $l$; pretrained segmentation model $f_{\Theta}$; desired error rate $\alpha$; mass-specific relevance factors $\{\gamma_m\}_{m \in \mathcal{M}}$;}
\KwOut{$\mathcal{C}(x)$ as prediction set for each voxel;}
\BlankLine
\tcp*[h]{\color{purple}{Step 1: Get model predictions}}\\
$\forall x \in \mathcal{X}, \hat{y} \in \mathcal{Y}:\ p(\hat{y}|x) \leftarrow \text{softmax}\big(f_{\Theta}(x,\hat{y})\big)$ \tcp*{Get predictive probabilities} 
\BlankLine
\tcp*[h]{\color{purple}{Step 2: Class-conditional calibration on $n$ voxels}} \\
\For{each class $\hat{y} \in \mathcal{Y}$}{
    $\tau^{\hat{y}}_{\alpha} \leftarrow \text{Quantile}_{1-\alpha}\Big(\big\{S_{\text{base}}(x_i,y_i):\ y_i=\hat{y}\big\}_{i=1}^n\Big)$\;
}
\BlankLine
\tcp*[h]{\color{purple}{Step 3: Compute spatial distances}} \\
\For{each voxel $x \in \mathcal{X}$}{
    $\forall m \in \mathcal{M}:\ \delta_m \leftarrow d_{Euc}(x, m)$ \tcp*{Distance to the critical masses (Eq.~\ref{mass_dist})}
    
    $V_l \leftarrow \{x' \in \mathcal{X} | \arg\max_{\{\hat{y}\in \mathcal{Y}\}} f_{\Theta}(x',\hat{y}) = l\}$ \tcp*{Set of canonical object voxels}
    
    $\phi_l \leftarrow \hat{d}_{Euc}(x,l)$ \tcp*{Distance to the canonical object (Eq.~\ref{label_dist} and~\ref{segmented_label})}
}
\BlankLine
\tcp*[h]{\color{purple}{Step 4: Generate SACP prediction sets}} \\
\For{each voxel $x \in \mathcal{X}$}{
    $v = {\arg\min}_{\{m\in \mathcal{M}\}}\ \delta_{m}$ \tcp*{Find the nearest critical mass}
    
    $w_{v}(x,l) \leftarrow \sigma\Big(\frac{1}{\gamma_{v}}\big(\phi_l + \delta_v\mathcal{I}(l)\big)\Big)$ \tcp*{Compute spatial weight (Eq.~\ref{weight_eq})}

    $S_{\text{SACP}}(x|\hat{y}=l) \leftarrow w_{v}(x,l)\cdot S_{\text{base}}(x|\hat{y}=l)$ \tcp*{Score for canonical object}

    $l \in \mathcal{C}(x) \Leftrightarrow S_{\text{SACP}}(x|\hat{y}=l) \leq \tau^{l}_{\alpha}$ \tcp*{Conservative inclusion of $\hat{y}=l$ (Eq.~\ref{sacp_nonconf})}

}
\Return{$\mathcal{C}(x)$ for all $x \in \mathcal{X}$}
\end{algorithm2e}

\section {Further Details of SACP Parameters}
\label{app_sacp_details}

We compute $\delta_m$ as the Euclidean distance from voxel $x$ to any of the potential critical masses $m \in \mathcal{M}$ (e.g., major vessels) that is defined by the function $d: \mathcal{X} \times \mathcal{M} \rightarrow \mathbb{R}^+$ as, 
\begin{equation}
\delta_m = d_{Euc}(x,m) = \underset{x'\in V_m}{\min} || x - x' || \ ,
\label{mass_dist}
\end{equation}
where $m \in \mathcal{M}$ is a critical mass containing a set of voxels $V_m \subset \mathcal{X}$. 

We compute $\phi_l$ as the Euclidean distance from voxel $x$ to the segmentation outcome of a pretrained model $f_{\Theta}$ that is defined by the function $\hat{d}: \mathcal{X} \times \mathcal{Y} \rightarrow \mathbb{R}^+$ as, 
\begin{equation}
\phi_l = \hat{d}_{Euc}(x,l) = \underset{x'\in V_l}{\min} || x - x' || \ ,
\label{label_dist}
\end{equation}
where $l \in \mathcal{Y}$ is the canonical object label and $V_l \subset \mathcal{X}$ contains a set of voxels that are segmented as label $l$ such that:
\begin{equation}
V_l=\big\{x' \in \mathcal{X}\ |\ \underset{\hat{y}\in \mathcal{Y}}{\arg\max}\ f_{\Theta}(x',\hat{y}) = l\big\}\ ,
\label{segmented_label}
\end{equation} 
in which $f_{\Theta}(x',\hat{y})$ is the outcome of the pretrained segmentation model associated with the label $\hat{y}$ when classifying the voxel $x'$.

We also compute the confidence of segmentation model defined as the predictive probability associated with the canonical object label $l$ (e.g., a tumor) and denoted by $p(\hat{y}=l|x)$ for each voxel $x$. 
High confidence associated with the canonical object label indicates that the model is making reliable predictions that a voxel belongs to that label, which can be valuable in improving reliability in high-risk tasks.
The weight function is formulated to represent lower values with higher probabilities (i.e., more confident predictions) and vice versa, emphasizing regions where the model is more confident while discounting less certain regions. To refine the weight computation, we use this segmentation confidence to calculate the surprisal function $\mathcal{I}(l)\defeq-\log p(\hat{y}=l|x)$. This surprisal quantifies the information content or unexpectedness of observing the canonical object $l$ with probability $p(\hat{y}=l|x)$ and accounts for the model's inherent uncertainty during segmentation.
By incorporating surprisal, the prediction sets dynamically adapt to the probabilistic confidence of the model.

Following class-conditional CP with the desired confidence level $1-\alpha$ and according to~\equationref{eq_class_q}, we independently compute the class-specific quantile $\tau^{\hat{y}}_{\alpha}$ associated with the canonical object label $\hat{y}=l \in \mathcal{Y}$, based on the $S_{\text{base}}$ scores of calibration data.
Then, we use $S_{\text{SACP}}$ during testing to include the canonical object label $l$ in the voxels' prediction sets as proposed in Corollary~\ref{label_corollary}.

\section{Proof of Theorem~\ref{conservative_theorem}}
\label{app_proof}

\begin{proof}
Following class-conditional CP, $\tau^{\hat{y}}_{\alpha}$ denotes the $(1-\alpha)$-quantile of $S_{\text{base}}$ scores associated with calibration data with label $\hat{y}$. Then, for each voxel $x$, the condition for inclusion the canonical object label $\hat{y}=l$ in the prediction set $\mathcal{C}_{\text{base}}(x)$ generated by $S_{\text{base}}$ scores is:
\begin{equation}
S_{\text{base}}(x|\hat{y}=l) \leq \tau^{l}_{\alpha}\ .    
\label{base_inclusion_app}
\end{equation}
By the definition of in~\equationref{sacp_nonconf}, $S_{\text{SACP}}$ is computed for each voxel $x$ and the canonical object label $\hat{y}=l$ using the normalized weight $w_v$ as,
\begin{equation}
S_{\text{SACP}}(x|\hat{y}=l) = w_v\cdot S_{\text{base}}(x|\hat{y}=l) \qquad s.t. \qquad w_v=\sigma(\tilde{w}_v)\ ,  
\label{sacp_eq_app}
\end{equation}
where $\tilde{w}_v \in \mathbb{R}^+$ is the raw weight value defined in~\equationref{weight_eq}, and $\sigma(.)$ is the steep sigmoid function (with the gain factor $\beta$) defined as $\sigma(\tilde{w}_v)=\frac{1}{1+\exp(-\beta\tilde{w}_v)}$. For other labels $\hat{y} \neq l$, $S_{\text{base}}$ is used to include the labels in the sets. As $\tilde{w}_v$ is positive and normalized to be less than $1$, so $0.5 \leq w_v < 1$. 
Then, it follows that:
\begin{equation}
\forall x\in \mathcal{X}: \quad S_{\text{SACP}}(x|\hat{y}=l) < S_{\text{base}}(x|\hat{y}=l)\ .
\label{sacp_compare_eq_app}
\end{equation}
Note that $\underset{\tilde{w}_v\to+\infty}{\lim} w_v = 1$, and consequently, $\underset{\tilde{w}_v\to+\infty}{\lim} S_{\text{SACP}} = S_{\text{base}}$.
According to~\equationref{base_inclusion_app,sacp_compare_eq_app}, the above inequality implies the following condition to include $l$ in the set:
\begin{equation}
    S_{\text{SACP}}(x|\hat{y}=l) = w_v\cdot S_{\text{base}}(x|\hat{y}=l) \leq \tau^{l}_{\alpha}\ .
\end{equation}
Therefore, any label $\hat{y}\neq l$ included in $\mathcal{C}_{\text{base}}(x)$ (i.e., $S_{\text{base}}(x|\hat{y}) \leq \tau^{\hat{y}}_{\alpha}$) is also included in the prediction set $\mathcal{C}_{\text{SACP}}(x)$ generated by SACP, and for the canonical object $\hat{y}=l$, $S_{\text{SACP}}(x|\hat{y}=l) < S_{\text{base}}(x|\hat{y}=l)$ holds. 
Formally, this means:
\begin{equation}
   \mathcal{C}_{\text{base}}(x) \subseteq \mathcal{C}_{\text{SACP}}(x)\ . 
\end{equation}
\end{proof}

\section{Conservativeness in Conformal Prediction}
\label{app_conservative}
Conformal prediction constructs set-valued predictions with a user-specified coverage guarantee, ensuring that the empirical coverage of the prediction sets is at least the nominal confidence level. Given a dataset $\mathcal{D}_n = \big\{(x_i, y_i)\big\}_{i=1}^{n}$ and a new test point $x_{n+1}$, CP produces a prediction set $\mathcal{C}_{n, \alpha}(x_{n+1})$ such that 
\begin{equation}
    \mathbb{P}(y_{n+1} \in \mathcal{C}_{\alpha}(x_{n+1})) \geq 1 - \alpha\ .
    \label{lower_bound}
\end{equation}
This property, known as \emph{conservativeness}, guarantees that the probability of the true label being included in the prediction set is at least $1 - \alpha$, often making CP slightly over-conservative due to the discrete nature of rank-based p-values in finite samples.

Conservativeness leads to both lower and upper bounds on the empirical coverage. The lower bound is given directly by the validity guarantee, ensuring~\equationref{lower_bound}.
However, the actual coverage can be higher than $1 - \alpha$ due to the discreteness of conformity scores, leading to an upper bound of the form
\begin{equation}
\mathbb{P}\big(y_{n+1} \in \mathcal{C}_{\alpha}(x_{n+1})\big) \leq 1 - \alpha + \frac{1}{n+1}\ .    
\end{equation}
This small excess coverage diminishes as $n$ grows, ensuring that CP becomes \emph{asymptotically exact}, meaning  
\begin{equation}
\lim_{n \to +\infty} \mathbb{P}\big(y_{n+1} \in \mathcal{C}_{\alpha}(x_{n+1})\big) = 1 - \alpha\ .    
\label{exact_cov}
\end{equation}
For class-conditional CP, $n$ refers to the number of calibration samples in each class.
We encounter stronger conservativeness for rare classes (e.g., tumor label) as classes with small $n$ suffer from higher over-coverage due to the larger impact of discrete rank-based p-values.
Due to asymptotic exactness, as $n\rightarrow+\infty$, the upper bound tightens, and class-conditional CP approaches exact coverage in~\equationref{exact_cov}.
Unlike standard CP, class-conditional CP does not enforce a single global coverage level but rather adapts to the structure of the data, ensuring per-class validity.

Thus, conservativeness guarantees \emph{validity} for all sample sizes while maintaining distribution-free coverage guarantees. Class-conditional CP maintains the fundamental conservativeness of standard CP but is more sensitive to class imbalances, making it particularly useful when fairness across classes is a concern.

\section{Additional Experimental Results for MSK Dataset}
\label{additional_results}

\noindent\textbf{Dataset Characteristics.}
We analyze 30 contrast-enhanced computed tomography (CT) scans from the Memorial Sloan Kettering (MSK) Medical Segmentation Decathlon Pancreas dataset~\cite{simpson2019large}, comprising portal venous phase CT scans from Memorial Sloan Kettering Cancer Center (New York, USA). Ground truth segmentations were established through expert abdominal radiologist annotations for pancreatic masses (including cysts and tumors), while surrounding anatomical structures were segmented using TotalSegmentator~\cite{wasserthal2023totalsegmentator}. These complementary segmentations were integrated using a hierarchical fusion approach that prioritizes radiologists' tumor delineations over automated organ segmentations. This dataset includes a heterogeneous mix of pancreatic masses including resectable PDAC, intraductal papillary mucinous neoplasms (IPMN), and pancreatic neuroendocrine tumors (PNET). This composition notably differs from both the typical clinical presentation of PDAC, where approximately $80-85\%$ of patients present with vessel involvement indicating borderline resectable, locally advanced, or metastatic disease, and from our primary dataset which specifically captured the full range of PDAC presentations including locally advanced cases.

\vspace{1mm}\noindent\textbf{Coverage Analysis.}
Our framework maintains strong performance on the MSK dataset, achieving an overall coverage of $0.980$ (mean per-case: $0.985 \pm 0.007$ standard error of the mean (SEM)). The coverage significantly exceeds the target coverage of $0.95$ (Wilcoxon signed-rank test, $p=0.0009$).

\vspace{1mm}\noindent\textbf{Distance-Based Analysis.}
Table~\ref{tab:vessel_coverage_msk} presents vessel-specific coverage rates across different proximity zones. The coverage patterns reflect the resectable nature of the cases, with notably high coverage rates in regions farther from vessels. Near-vessel regions ($\leq 2$mm) show more variable coverage ($0.821$-$1.000$) when tumor-vessel contact is present.
\begin{table}[t]
\centering
\caption{Vessel-specific coverage rates at different proximity zones for the MSK dataset. The missing values ("-") indicate no tumor voxels were predicted near the celiac trunk and hepatic artery at these distances, consistent with the MSK dataset's focus on resectable PDAC cases.}
\vspace{2mm}
\label{tab:vessel_coverage_msk}
\begin{tabular}{l|ccccc}
\hline
Vessel & $\leq$2mm & $\leq$5mm & $\leq$10mm & $\leq$20mm & $>$20mm \\
\hline
CeTr & - & - & - & 0.985 & 0.980 \\
HA & - & 0.956 & 0.753 & 0.906 & 0.991 \\
SMA & 1.000 & 0.995 & 0.997 & 0.991 & 0.975 \\
PV & 0.821 & 0.858 & 0.918 & 0.967 & 0.991 \\
SMV & 0.973 & 0.989 & 0.986 & 0.988 & 0.966 \\
\hline
\end{tabular}
\end{table}
The relative width ratio (RWR) analysis shows a consistent relationship between prediction set size and vessel proximity, though less pronounced than in the primary dataset. Mean RWR values range from $1.141 \pm 0.031$ SEM in near-vessel regions ($\leq 2$mm) to $1.655 \pm 0.005$ SEM beyond 20mm. This pattern of increasing width with vessel proximity persists across all vessels.
\begin{figure}[t]
    \centering
    \includegraphics[width=0.5\linewidth]{figures/msk.pdf}
    \caption{Comparison of empirical coverage at different confidence levels between our method (SACP) and standard Class-Conditional CP (CCCP) on the MSK dataset.}
    \label{fig:msk}
\end{figure}
\begin{figure}[t]
    \begin{center}
        \begin{minipage}{0.45\textwidth}
            % \centering
            \includegraphics[height=4.5cm, width=7cm, keepaspectratio=false]{figures/100220.PNG}
            {\small HA contact point: Purple boundary's lateral expansion suggests possible arterial involvement requiring arterial resection planning, while orange CP misses this critical region.}
        \end{minipage}
        \hfill
        \begin{minipage}{0.45\textwidth}
            % \centering
            \includegraphics[height=4.5cm, width=7cm, keepaspectratio=false]{figures/100220_2.PNG}
            {\small SMA contact point: Spatially-adaptive expansion identifies possible arterial invasion, a distinction missed by uniform CCCP bounds.}
        \end{minipage}

        \vspace{1em}

        \begin{minipage}{0.45\textwidth}
            % \centering
            \includegraphics[height=4.5cm, width=7cm, keepaspectratio=false]{figures/100246.PNG}
            {\small SMV contact point: Purple boundary's circumferential expansion indicates potential venous involvement unlike CCCP's assessment.}
        \end{minipage}
        \hfill
        \begin{minipage}{0.45\textwidth}
            % \centering
            \includegraphics[height=4.5cm, width=7cm, keepaspectratio=false]{figures/100167.PNG}
            {\small Portal-SMV confluence: Focused purple expansion suggests confluence involvement requiring vascular reconstruction planning, which uniform CCCP bounds fail to detect.}
        \end{minipage}

        \vspace{1em}

        \caption{Anatomically-adaptive conformal prediction sets compared to standard CCCP for PDAC cases. Ground truth tumor boundaries ({\color{blue}blue}), model predictions ({\color{goldenpoppy}yellow}), and vessel regions ({\color{red}red}) are shown. Our prediction sets ({\color{purple}purple}) provide adaptive uncertainty bounds based on vessel proximity, unlike the uniform width of standard CCCP ({\color{orange}orange}), enabling more informed surgical planning in critical regions.}
        \label{fig:examples}
    \end{center}
\end{figure}
The results from this dataset complement our primary analysis while highlighting the importance of dataset composition in evaluating conformal prediction frameworks for PDAC segmentation. The predominantly resectable cases in the MSK dataset provide insights into framework performance in scenarios with limited vessel involvement, while underscoring the need for diverse datasets that capture the full spectrum of PDAC presentations for comprehensive validation.

\noindent\textbf{Comparison with Standard Class-Conditional CP.}
As described in Figure~\ref{fig:msk}, our spatially-adaptive approach yields comparable overall coverage ($0.980$ vs $0.979$) while demonstrating improved stability in anatomically critical regions. Near vessels ($\leq2mm$), we achieve higher coverage ($0.959$ vs $0.956$) with more efficient prediction sets (RWR $1.141 \pm 0.061$ SEM vs $1.205 \pm 0.095$ SEM). The framework shows a more controlled increase in RWR with vessel proximity, ranging from $1.141 \pm 0.061$ SEM at $\leq2mm$ to $1.655 \pm 0.009$ SEM beyond 20mm, demonstrating effective adaptation to anatomical context while maintaining strong coverage guarantees.

\section{Experimental Setup Details}
\label{app:ai_seg}
\subsection{PDAC Segmentation Model Implementation}
\label{app:ai_seg_pdac} 
The PDAC and organ segmentation model utilized a novel tripartite architecture consisting of a teacher, professor, and student model, implemented using 3D UNet cascade architectures. The teacher model was initially trained on 517 contrast-enhanced CT scans from the PREOPANC trials (Amsterdam UMC and Leiden UMC), LAPC registry (Dutch Pancreatic Cancer Group), and control patients who underwent CT prior to transcatheter aortic valve implantation~\cite{van2018preoperative,janssen2021total,stoop2022surgical}. Ground truth segmentations were established by three expert radiologists at the Amsterdam University Medial Centers who manually segmented PDAC tumors in 256 LAP-CTs from 120 patients with (borderline) resectable PDAC and 66 LAP-CTs from 66 LAPC patients using 3D Slicer (version 4.11.20210226~\cite{fedorov20123d}). Additional anatomical context was provided through automated segmentation of surrounding structures (pancreas, duodenum, spleen, kidneys, adrenal glands, liver, and gallbladder) using TotalSegmentator version 1.5.6~\cite{wasserthal2023totalsegmentator}. The professor model, trained on 106 CT scans, was designed to refine the teacher's pseudo-segmentations using an Underestimation Focuser correction matrix that prioritized correctly identified tumors and areas of underestimation. The final student model was trained on an expanded dataset of 1085 CTs from 903 patients, combining manually segmented data with professor-corrected pseudo-segmentations. The model weights are publicly available at \url{https://zenodo.org/records/14782552}.

\subsection{Vessel Segmentation Model Implementation}
\label{app:ai_seg_vessel}
The vessel segmentation model was implemented using a 3D nnUNet cascade architecture (low-resolution followed by full-resolution) trained on a dataset of 92 contrast-enhanced CT scans~\cite{isensee2021nnu}. The model was designed to segment nine vascular structures: aorta, celiac trunk, hepatic artery, splenic artery, superior mesenteric artery, inferior vena cava, portal vein, splenic vein, and superior mesenteric vein. Training data was sourced from the PREOPANC trials and control patients, comprising CT scans from patients with varying stages of pancreatic ductal adenocarcinoma (PDAC) and control subjects who underwent CT imaging for transcatheter aortic valve implantation~\cite{van2018preoperative}. Ground truth segmentations were established through manual annotation by seven trained observers at the Amsterdam University Medical Centeres using 3D Slicer (version 4.11.20210226)~\cite{fedorov20123d}, with particular focus on the five vessels critical for PDAC resectability assessment: celiac trunk, hepatic artery, portal vein, and the superior mesenteric vessels. The model weights are publicly available at \url{https://zenodo.org/records/14782552}.


\section{Additional Visualization Examples}
\label{visual_examples}
Figure~\ref{fig:examples} shows additional examples of our spatially-adaptive conformal prediction method across different PDAC cases taken from the PANORAMA dataset, demonstrating how the prediction sets adapt to varying tumor-vessel relationships. The visualization boundaries are obtained by creating a binary segmentation mask where voxels are assigned a value of 1 if the tumor label is included in their prediction set C(x), and 0 otherwise. The boundary of this binary mask defines our prediction set visualization (shown in purple), while standard CCCP bounds are shown in orange for comparison. 

The examples illustrate various clinically relevant scenarios of tumor-vessel interfaces. Near the hepatic artery contact point, the spatially-adaptive boundary expands laterally to indicate possible arterial involvement. At the superior mesenteric artery interface, our method identifies potential arterial invasion through targeted expansion. The superior mesenteric vein contact region shows circumferential expansion suggesting venous involvement, while at the portal-SMV confluence, focused expansion indicates potential involvement requiring vascular reconstruction consideration. These cases demonstrate how SACP provides anatomically-informed uncertainty bounds that adapt based on vessel proximity, offering more detailed information for surgical planning compared to uniform CCCP bounds.


\section{Segmentation Performance Analysis by Vessel Proximity}
\label{dice}
To further validate the rationale for spatially-adaptive uncertainty quantification, we analyzed segmentation performance as a function of distance from critical vascular structures across both the PANORAMA and MSK datasets. This analysis revealed consistent patterns of reduced segmentation accuracy near vessel interfaces, indicating that the phenomenon is inherent to the task rather than dataset-specific.
Tables~\ref{tab:dice_panorama} and~\ref{tab:dice_msk} present the Dice coefficient analysis for tumor segmentation stratified by proximity to vessels.
\begin{table}[t]
\centering
\caption{Dice coefficient analysis by vessel proximity for PDAC segmentation on the PANORAMA dataset.}
\label{tab:dice_panorama}
\begin{tabular}{lccc}
\toprule
\textbf{Region} & \textbf{Median Dice} & \textbf{Mean Dice} & \textbf{Std Dev} \\
\midrule
Overall & 0.8084 & 0.7495 & 0.1476 \\
Near vessels ($<$5mm) & 0.7539 & 0.6364 & 0.2674 \\
Far from vessels ($\geq$5mm) & 0.8161 & 0.7543 & 0.1544 \\
\bottomrule
\end{tabular}
\end{table}

\begin{table}[t]
\centering
\caption{Dice coefficient analysis by vessel proximity for pancreatic tumor segmentation on the MSK dataset.}
\label{tab:dice_msk}
\begin{tabular}{lccc}
\toprule
\textbf{Region} & \textbf{Median Dice} & \textbf{Mean Dice} & \textbf{Std Dev} \\
\midrule
Overall & 0.3437 & 0.3799 & 0.3078 \\
Near vessels ($<$5mm) & 0.0716 & 0.2392 & 0.2831 \\
Far from vessels ($\geq$5mm) & 0.3553 & 0.3884 & 0.3138 \\
\bottomrule
\end{tabular}
\end{table}
In the PANORAMA dataset, we observed a 12\% decrease in mean Dice scores for regions near vessels compared to regions farther from vessels, along with significantly higher variability in near-vessel performance. This pattern was even more pronounced in the MSK dataset, which showed a 38\% decrease in mean Dice scores near vessels, with the median score dropping from 0.3553 in distant regions to just 0.0716 near vessels.

The overall lower Dice scores in the MSK dataset compared to PANORAMA can be attributed to several factors: (1) the MSK dataset includes a more heterogeneous mix of pancreatic pathologies beyond PDAC, including various cystic neoplasms and neuroendocrine tumors that present different imaging characteristics; and (2) annotation protocols between datasets likely differed in how tumor boundaries were defined. Despite these differences in overall performance, the spatial pattern of substantially decreased accuracy near vessels persists across both datasets.

The consistency of this pattern across datasets with different characteristics underscores the fundamental challenge in accurately segmenting tumor-vessel interfaces—precisely the regions where clinical decision-making is most critical for treatment planning. These findings provide evidence for spatially-adaptive uncertainty quantification approaches like SACP, which can account for this predictable spatial heterogeneity by providing appropriately expanded prediction sets in anatomically critical regions.


\section{Computational Requirements}
\label{comp_req}
The computational analysis was conducted on a MacBook Air equipped with an Apple M2 chip and 8GB of RAM, without GPU acceleration. The computational workflow comprised two primary phases: calibration and testing. The pre-computation of probability maps and distance maps is performed separately, with these artifacts serving as input to our conformal prediction framework. The computation of the tumor and vessel distance maps takes approximately 10 and 56 seconds for all 30 scans, respectively and is completed on the cropped scans. The calibration phase, which involves computing non-conformity scores and deriving quantile thresholds across anatomical labels, required approximately 39 seconds. The subsequent testing phase, which generates prediction sets and evaluates uncertainty quantification, took approximately 208 seconds.


\section{Effect of Vessel Relevancy Hyperparameter ($\gamma$) on SACP Performance}
\label{gamma_exp}

\begin{table}[t]
\centering
\caption{Impact of vessel relevancy hyperparameter ($\gamma$) on performance metrics}
\label{tab:gamma-analysis}
\resizebox{\columnwidth}{!}{
\begin{tabular}{lcccccr}
\hline
\textbf{Configuration} & \textbf{Overall} & \textbf{Coverage} & \textbf{Coverage} & \textbf{RWR} & \textbf{RWR} & \textbf{RWR} \\
 & \textbf{Coverage} & \textbf{($\leq$2mm)} & \textbf{($>$20mm)} & \textbf{($\leq$2mm)} & \textbf{($>$20mm)} & \textbf{Ratio*} \\
\hline
Baseline (arterial: 0.8, venous: 0.6) & 0.989 & 0.984 & 0.988 & 2.739 & 2.525 & 1.085 \\
Min difference (art: 0.8, ven: 0.8) & 0.989 & 0.984 & 0.989 & 2.720 & 2.524 & 1.078 \\
Max difference (art: 0.5, ven: 1.0) & 0.989 & 0.979 & 0.989 & 2.697 & 2.516 & 1.072 \\
CeTr focus ($\gamma$=0.6) & 0.992 & 0.991 & 0.991 & 2.642 & 2.527 & 1.045 \\
HA focus ($\gamma$=0.6) & 0.992 & 0.988 & 0.992 & 2.657 & 2.523 & 1.053 \\
SMA focus ($\gamma$=0.6) & 0.991 & 0.986 & 0.990 & 2.669 & 2.528 & 1.056 \\
PV focus ($\gamma$=0.6) & 0.991 & 0.985 & 0.991 & 2.678 & 2.523 & 1.061 \\
SMV focus ($\gamma$=0.6) & 0.990 & 0.990 & 0.990 & 2.697 & 2.534 & 1.064 \\
\hline
\multicolumn{7}{l}{*$\text{RWR Ratio} = \rho(\leq 2mm)/\rho(>20mm)$} \\
\end{tabular}
}
\end{table}

\subsection{Analysis of Different Vessel Weight Configurations}

The selection of vessel Relevancy Hyperparameter ($\gamma$) significantly impacts the behavior of the spatially-adaptive conformal prediction framework. As demonstrated in Table~\ref{tab:gamma-analysis}, we systematically evaluated various configurations to understand their effect on coverage guarantees and prediction set sizes on the PANORAMA dataset.

\subsection{Differential Weighting}

Our baseline configuration with lower $\gamma$ values for arterial vessels ($\gamma$=0.6) compared to venous vessels ($\gamma$=0.8) maintained strong coverage guarantees (0.989) while providing pronounced spatial adaptation (RWR Ratio: 1.085). This configuration aligns with clinical priorities where arterial involvement typically poses greater surgical challenges.

\subsection{Varying the Contrast Between Vessel Types}

Configurations with minimal contrast between arterial and venous weights (art: 0.8, ven: 0.8) and maximized contrast (art: 0.5, ven: 1.0) both demonstrated strong performance. The maximized contrast configuration showed a slight decrease in near-vessel coverage (0.979) compared to other configurations, suggesting a potential coverage-efficiency tradeoff when the contrast becomes too pronounced.

\subsection{Vessel-Specific Configurations}

Individual vessel-focused configurations revealed interesting patterns:
\begin{enumerate}
\item \textbf{Celiac Trunk (CeTr) focus}: Produced the highest near-vessel coverage (0.991) with relatively minimal spatial adaptation (RWR Ratio: 1.045)
\item \textbf{Hepatic Artery (HA) focus}: Balanced coverage (0.988 near vessels) with moderate spatial adaptation (RWR Ratio: 1.053)
\item \textbf{Superior Mesenteric Artery (SMA) focus}: Similar to HA but with slightly more pronounced spatial adaptation
\item \textbf{Portal Vein (PV) focus}: Maintained good coverage with increased spatial adaptation (RWR Ratio: 1.061)
\item \textbf{Superior Mesenteric Vein (SMV) focus}: Demonstrated excellent near-vessel coverage (0.990) with strong spatial adaptation (RWR Ratio: 1.064)
\end{enumerate}

\subsection{Clinical Implications}
The choice of vessel importance weights should align with clinical guidelines and surgical priorities. For centers following NCCN guidelines (United States), where arterial involvement beyond 180° renders a tumor unresectable, configurations with lower $\gamma$ values for arterial vessels (our baseline) may be preferable. For centers following European guidelines with different resectability criteria, alternative weightings may be more appropriate.

Our experiments confirm that the $\gamma$ parameter provides an effective mechanism for tuning the spatial awareness of the conformal prediction framework while maintaining strong coverage guarantees across all configurations.

\section{Comparison with Uniformly Conservative CCCP}
\label{ucccp}
To evaluate whether spatial awareness provides benefits beyond simply increasing overall conservativeness, we compared our SACP approach with a uniformly conservative CCCP (UC-CCCP) on the PANORAMA dataset. The uniformly conservative CCCP applies the minimum weight factor from our spatial approach (0.5) uniformly across all voxels, representing the most conservative setting possible under our weighting scheme.

\subsection{Experimental Setup}
We implemented the uniformly conservative CCCP by modifying the non-conformity score as,
\begin{equation}
S_{\text{UC-CCCP}}(x|\hat{y} = l) = 0.5 \cdot S_{\text{base}}(x|\hat{y} = l)\ .
\end{equation}
This reduces all non-conformity scores for the canonical object by 50\%, creating prediction sets that are uniformly more conservative regardless of vessel proximity. We evaluated this approach on the same 20 test cases using identical metrics as our primary experiments.

\subsection{Results}

Table \ref{tab:uniform_comparison} shows the comparative results between standard CCCP, uniformly conservative CCCP (UC-CCCP), and our spatially-adaptive approach (SACP).

\begin{table}[t]
\centering
\caption{Coverage and prediction set width comparison across vessel proximity zones}
\label{tab:uniform_comparison}
\resizebox{\columnwidth}{!}{
\begin{tabular}{lcccccc}
\hline
\multirow{2}{*}{Distance} & \multicolumn{2}{c}{CCCP} & \multicolumn{2}{c}{UC-CCCP} & \multicolumn{2}{c}{SACP} \\
\cmidrule(lr){2-3} \cmidrule(lr){4-5} \cmidrule(lr){6-7}
& Coverage & Width & Coverage & Width & Coverage & Width \\
\hline
$\leq$2mm    & 0.954 $\pm$ 0.027 & 2.887 $\pm$ 0.320 & 1.000 $\pm$ 0.000 & 1.608 $\pm$ 0.160 & 0.981 $\pm$ 0.008 & 2.762 $\pm$ 0.150 \\
$\leq$5mm    & 0.970 $\pm$ 0.016 & 2.702 $\pm$ 0.262 & 1.000 $\pm$ 0.000 & 1.691 $\pm$ 0.175 & 0.987 $\pm$ 0.004 & 2.684 $\pm$ 0.131 \\
$\leq$10mm   & 0.977 $\pm$ 0.016 & 2.611 $\pm$ 0.263 & 1.000 $\pm$ 0.000 & 1.758 $\pm$ 0.187 & 0.988 $\pm$ 0.004 & 2.621 $\pm$ 0.122 \\
$\leq$20mm   & 0.978 $\pm$ 0.003 & 2.574 $\pm$ 0.205 & 1.000 $\pm$ 0.000 & 1.835 $\pm$ 0.191 & 0.987 $\pm$ 0.001 & 2.592 $\pm$ 0.090 \\
$>$20mm      & 0.982 $\pm$ 0.002 & 2.509 $\pm$ 0.078 & 1.000 $\pm$ 0.000 & 1.697 $\pm$ 0.020 & 0.988 $\pm$ 0.000 & 2.525 $\pm$ 0.036 \\
\hline
Overall      & 0.968 $\pm$ 0.038 & 2.657 $\pm$ 0.226 & 1.000 $\pm$ 0.000 & 1.718 $\pm$ 0.147 & 0.987 $\pm$ 0.004 & 2.637 $\pm$ 0.106 \\
\hline
\end{tabular}
}
\end{table}

The results reveal a critical limitation of the uniformly conservative approach: UC-CCCP achieves perfect coverage (1.000) across all regions, but this comes with a fundamental change in the prediction set structure. The perfect coverage indicates that for every voxel, the prediction set includes all possible labels - effectively rendering the predictions meaningless from a clinical perspective.

\begin{figure}[t]
    \centering
    \begin{minipage}[b]{0.3\textwidth}
        \centering
        \includegraphics[width=\textwidth]{figures/CCCP.png}
        \vspace{0.5em}
        (a) CCCP
        \label{fig:cccp_standard}
    \end{minipage}
    \hfill
    \begin{minipage}[b]{0.3\textwidth}
        \centering
        \includegraphics[width=\textwidth]{figures/ucccp.png}
        \vspace{0.5em}
        (b) UC-CCCP
        \label{fig:cccp_uniform}
    \end{minipage}
    \hfill
    % Third image
    \begin{minipage}[b]{0.3\textwidth}
        \centering
        \includegraphics[width=\textwidth]{figures/SACP.png}
        \vspace{0.5em}
        (c) SACP
        \label{fig:sacp}
    \end{minipage}
    \caption{Comparison of prediction sets across different conformal prediction methods. Blue shows standard CCCP prediction sets, pink represents uniformly conservative CCCP sets, orange illustrates SACP prediction sets, and red represents the Superior Mesenteric Vein. The figure demonstrates how SACP provides more nuanced uncertainty quantification compared to standard and uniformly conservative CCCP approaches.}
    \label{fig:cp_comparison}
\end{figure}

The reported width values for UC-CCCP might appear smaller than CCCP and SACP, but this is misleading because the width metric measures average cardinality relative to the total number of classes. When a prediction set includes all possible labels (as happens with UC-CCCP), the prediction set width calculation is affected by how the metric is normalized. The actual prediction sets are maximally large, including all possible label options for every voxel, which renders them clinically unusable.

In contrast, SACP maintains high coverage (0.987) while producing prediction sets that spatially adapt to anatomical context. As shown in Figure \ref{fig:cp_comparison}, SACP creates tighter prediction sets away from vessels and more conservative sets near vessels, aligning with clinical priorities for surgical planning. Additional image comparisons are presented on our github \url{https://github.com/tailabTMU/SACP}.

These results demonstrate a fundamental advantage of SACP: it achieves the necessary balance between coverage guarantees and prediction set efficiency. Simply making CCCP uniformly more conservative leads to perfect but clinically useless prediction sets. SACP, on the other hand, focuses conservativeness where it matters most - near vessels where accurate boundary delineation is crucial for surgical planning - while maintaining efficient prediction sets elsewhere.

\section{Analysis of Binary versus Continuous Weighting Schemes}
\label{binary}
The binary weighting approach represents a simplified baseline for incorporating anatomical context into conformal prediction. To ensure fair comparison, we derived the binary weights from our continuous method, using 0.5 and 0.997 as they represent the minimum weights applied across all voxels and cases for near ($<5$mm) and far ($>5$mm) regions respectively. This choice ensures the binary method maintains at least the same level of conservativeness as our continuous approach in each region. This method divides the prediction space into two distinct regions based on proximity to critical structures:

\begin{equation}
w_{\text{binary}}(x) = \begin{cases}
    w_{\text{near}} & \text{if } \delta_v \leq d_{\text{threshold}} \\
    w_{\text{far}} & \text{if } \delta_v > d_{\text{threshold}}
\end{cases}
\end{equation}
where $\delta_v$ represents the distance to the nearest vessel $v$, $d_{\text{threshold}}$ is a fixed distance threshold (e.g., 5mm), and $w_{\text{near}}$, $w_{\text{far}}$ are predetermined weights for near and far regions, respectively.

To compare this baseline against our spatially-adaptive approach, we implemented the binary scheme using weights of 0.5 and 0.997 for near ($<5$mm) and far ($>5$mm) regions respectively, derived from the empirical weight distribution of our original method. The binary approach achieved the following results:
\begin{itemize}
    \item Strong nominal coverage: Vessel-wise coverage reached 1.000 for regions within 5mm across all vessels
    \item Maintained coverage of 0.985-0.989 for regions beyond 10mm
    \item Prediction sets exhibited significantly larger volumes near vessels compared to our continuous approach
\end{itemize}
However, this apparently strong performance revealed several limitations:
\begin{enumerate}
    \item \textbf{Spatial Discontinuity}: The sharp transition at the 5mm boundary creates artificial discontinuities in prediction sets that do not reflect the gradual nature of anatomical relationships
    \item \textbf{Over-conservative Estimates}: The binary approach tends to include entire vessel-adjacent regions in prediction sets, leading to unnecessarily large prediction regions
    \item \textbf{Loss of Anatomical Context}: The simplified weighting scheme fails to capture the nuanced spatial relationships present in medical images
\end{enumerate}


\begin{figure}[t]
    \centering
    \subfigure[SACP (Our Method)]{
    \includegraphics[width=0.45\textwidth]{figures/SACP.png}
        \label{fig:sacp_method}
    }
    \hfill
    \subfigure[Binary Threshold Baseline]{
    \includegraphics[width=0.45\textwidth]{figures/binary.png}
        \label{fig:binary_baseline}
    }
    \caption{Visual comparison between our spatially-adaptive approach and the binary threshold baseline. Both images show a pancreatic tumor (yellow) adjacent to a critical vessel (red). While both methods achieve the target coverage rate, the binary approach (b) produces clearly unsuitable prediction sets. It effectively includes almost the entire vessel-adjacent region (light blue), failing to provide meaningful uncertainty bounds for surgical planning. In contrast, our SACP method (a) generates focused prediction regions (orange) that maintain anatomical relevance while ensuring coverage.}
    \label{fig:binary_comparison}
\end{figure}


The limitations of the binary approach become immediately apparent in Figure \ref{fig:binary_comparison}. While achieving strong numerical coverage ($>0.98$ across all vessel-wise evaluations), the binary method produces prediction sets that are too broad to be clinically useful. As shown in Figure~\ref{fig:binary_baseline}, it effectively includes entire vessel-adjacent regions in its prediction sets. Additional image comparisons are presented on our github \url{https://github.com/tailabTMU/SACP}.
In contrast, our continuous weighting approach offers several advantages:
\begin{itemize}
    \item Smooth transitions in prediction set boundaries
    \item Adaptive uncertainty estimation based on continuous distance measures
    \item Integration of multiple anatomical factors through $\phi_l$ and $\mathcal{I}(l)$
\end{itemize}

\section{Additional Analysis of Relative Width Ratio}
The Relative Width Ratio (RWR) provides a quantitative measure of how prediction set sizes adapt based on proximity to critical anatomical structures. Table \ref{tab:vessel_rwr} presents a comprehensive analysis of both coverage and RWR values across different vessels and distance thresholds.
The RWR values demonstrate several key patterns:
\begin{enumerate}
    \item \textbf{Distance-dependent Adaptation:} Both CCCP and SACP show decreasing RWR values as distance from vessels increases, indicating more precise prediction sets in non-critical regions.
    \item \textbf{Vessel-specific Behavior:} Arterial vessels (HA, SMA) show higher RWR values in close proximity ($\leq$2mm) compared to venous vessels (PV, SMV), reflecting the clinical importance of arterial involvement in surgical planning.
    \item \textbf{SACP Improvements:} Our method generally maintains or reduces RWR values while achieving higher coverage, particularly in critical regions ($\leq$5mm from vessels).
    \item \textbf{Stability at Distance:} Beyond 20mm, RWR values stabilize around 2.4-2.6 for both methods, indicating consistent behavior in non-critical regions.
\end{enumerate}

\begin{table}[t]
\caption{Vessel-specific coverage rates and Relative Width Ratios (RWR) at different proximity zones}
\label{tab:vessel_rwr}
\resizebox{\columnwidth}{!}{
\begin{tabular}{l|cccc|cccc|cccc|cccc|cccc}
\hline
& \multicolumn{4}{c|}{2mm} & \multicolumn{4}{c|}{5mm} & \multicolumn{4}{c|}{10mm} & \multicolumn{4}{c|}{20mm} & \multicolumn{4}{c}{$>$20mm} \\
Vessel & \multicolumn{2}{c}{CCCP} & \multicolumn{2}{c|}{SACP} & \multicolumn{2}{c}{CCCP} & \multicolumn{2}{c|}{SACP} & \multicolumn{2}{c}{CCCP} & \multicolumn{2}{c|}{SACP} & \multicolumn{2}{c}{CCCP} & \multicolumn{2}{c|}{SACP} & \multicolumn{2}{c}{CCCP} & \multicolumn{2}{c}{SACP} \\
& Cov & RWR & Cov & RWR & Cov & RWR & Cov & RWR & Cov & RWR & Cov & RWR & Cov & RWR & Cov & RWR & Cov & RWR & Cov & RWR \\
\hline
CeTr & 0.999 & 2.539 & 1.000 & 2.569 & 0.999 & 2.570 & 1.000 & 2.563 & 0.998 & 2.650 & 1.000 & 2.630 & 0.980 & 2.720 & 0.987 & 2.722 & 0.980 & 2.455 & 0.988 & 2.471 \\
HA   & 0.959 & 3.369 & 0.980 & 3.275 & 0.973 & 3.136 & 0.986 & 3.153 & 0.987 & 3.007 & 0.994 & 3.040 & 0.981 & 2.826 & 0.989 & 2.849 & 0.980 & 2.443 & 0.987 & 2.454 \\
SMA  & 0.925 & 2.761 & 0.975 & 2.627 & 0.967 & 2.569 & 0.989 & 2.501 & 0.982 & 2.501 & 0.994 & 2.468 & 0.973 & 2.476 & 0.985 & 2.479 & 0.984 & 2.524 & 0.989 & 2.552 \\
PV   & 0.927 & 3.152 & 0.953 & 3.006 & 0.955 & 2.847 & 0.974 & 2.877 & 0.957 & 2.698 & 0.974 & 2.743 & 0.978 & 2.614 & 0.989 & 2.642 & 0.981 & 2.468 & 0.987 & 2.478 \\
SMV  & 0.960 & 2.615 & 0.997 & 2.331 & 0.956 & 2.387 & 0.987 & 2.327 & 0.958 & 2.201 & 0.980 & 2.222 & 0.975 & 2.236 & 0.987 & 2.270 & 0.983 & 2.655 & 0.987 & 2.672 \\
\hline
\end{tabular}
}
\end{table}




