\documentclass[twoside]{article}

\usepackage{aistats2025}
\input{math_commands}
\usepackage{url} 
\usepackage{graphicx} 
\usepackage{amsmath}
\usepackage{amsfonts}

\usepackage{adjustbox}
\usepackage{booktabs} 
\usepackage{algorithm, algpseudocode}
\usepackage{mathtools}
\usepackage{soul}
\usepackage{bbm}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{amsthm}
\usepackage{adjustbox}
\usepackage{booktabs}
\usepackage{ragged2e}
\usepackage{amsmath}
\usepackage{svg}

\usepackage{adjustbox}
\usepackage{booktabs}
\newtheorem{theorem}{Theorem}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{assumption}[theorem]{Assumption}
\newtheorem{remark}[theorem]{Remark}
\newtheorem{question}[theorem]{Question}
\newcommand{\std}[1]{^{\scriptstyle{\pm#1}}}

% If your paper is accepted, change the options for the package
% aistats2025 as follows:
%
%\usepackage[accepted]{aistats2025}
%
% This option will print headings for the title of your paper and
% headings for the authors names, plus a copyright note at the end of
% the first column of the first page.

% If you set papersize explicitly, activate the following three lines:
%\special{papersize = 8.5in, 11in}
%\setlength{\pdfpageheight}{11in}
%\setlength{\pdfpagewidth}{8.5in}

% If you use natbib package, activate the following three lines:
%\usepackage[round]{natbib}
%\renewcommand{\bibname}{References}
%\renewcommand{\bibsection}{\subsubsection*{\bibname}}

% If you use BibTeX in apalike style, activate the following line:
%\bibliographystyle{apalike}

\begin{document}

% If your paper is accepted and the title of your paper is very long,
% the style will print as headings an error message. Use the following
% command to supply a shorter title of your paper so that it can be
% used as headings.
%
%\runningtitle{I use this title instead because the last one was very long}

% If your paper is accepted and the number of authors is large, the
% style will print as headings an error message. Use the following
% command to supply a shorter version of the authors names so that
% they can be used as headings (for example, use only the surnames)
%
%\runningauthor{Surname 1, Surname 2, Surname 3, ...., Surname n}

% Supplementary material: To improve readability, you must use a single-column format for the supplementary material.
\onecolumn
\aistatstitle{Supplementary Materials}

\section{Background of Graph Autoencoder}

%\begin{equation}\label{eq: interval RRC}
%\begin{split}
%   C_{ab} =  \Big[ &\hat{W}^{\alpha/2}_{ab} - d^{\textrm{RR}} \big|\hat{W}^{1-\alpha/2}_{ab} - \hat{W}^{\alpha/2}_{ab} \big|,  \\
%   & \hat{W}^{1-\alpha/2}_{ab} + d^{\textrm{RR}}\big|\hat{W}^{1-\alpha/2}_{ab} - \hat{W}^{\alpha/2}_{ab}\big| \Big], \; 
% \end{split}
% \end{equation}
 
%\begin{equation}
%   (a, b) \in \displaystyle \mE^{test},
%\end{equation}

%for conformal prediction-based RR (CP-RR), and for conformal quantile regression-based RR (CQR-RR),

\subsection{Guaranteed Edge Weight Prediction Using GNNs}\label{sec: problem}
Let $\gG=(\sV,\mE)$ be a graph with node set $V$ and edge set $E \subseteq \sV \times \sV$.
Assume the graph has $n$ nodes with $f$ features.
Let $\mX \in \displaystyle R^{n\times m}$ be the node feature matrix, and $X_{i,:} \in \displaystyle R^{f}$ be the feature vector of the $i^{th}$ node. 
The binary adjacency matrix of $\gG$, $ \mA \in \{0, 1\}^{n\times n}$, encodes the binary (unweighted) connecting structure of the graph. It is defined by:
\begin{equation} 
A_{ij} =
\begin{cases}
1, & \textrm{if } (i, j) \in \mE; \\
0, & \textrm{otherwise}.
\end{cases}
\end{equation}
Then, we define the weight matrix as $\mW \in \displaystyle R^{n\times n}{\geq 0}$, where $W_{ij}$ denotes the weight rather than the binary of the edge connecting node $i$ to node $j$. In the context of a road system, for example, we can interpret $W_{ij}$ as the volume of traffic transitioning from junction $i$ to junction $j$.

We partition the edge set $\mE$ into three disjoint subsets: $\mE^{train}$, $\mE^{val}$,$\mE^{calib}$ and $\mE^{test}$, while satisfy that $\mE = \mE^{train} \cup \mE^{val} \cup \mE^{calib} \cup \mE^{test}$.
We assume that the weights of the edges in $\mE^{train}$ and $\mE^{val}$ are known.
The objective is to estimate the unknown weights of the edges in $\mE^{test}$.
Additionally, we assume that the entire graph structure, represented by the adjacency matrix $A$, is known.

%
To mask the validation and test sets, we define \begin{equation}
\mA^{train} \in \{0, 1\}^{n\times n}, \quad 
A^{train}_{ij} =
\begin{cases}
1, & \textrm{if } (i, j) \in \mE^{train}; \\
0, & \textrm{otherwise}.
\end{cases}
\end{equation}
$\mA^{val}$, $\mA^{calib}$ and $\mA^{test}$ are defined  in the same way based on $\mE^{val}$, $\mE^{calib}$ and $\mE^{test}$, respectively.

If $(i, j) \notin \mE^{train}$,  it is possible to assign a small positive number to the corresponding element $Wtrain_{ij}$, such as an assigned value or the minimum of the existing edge weights. This can represent prior knowledge or assumptions about the unknown edge weight. In the following part, we use a positive constant $\delta > 0$ to represent this minimal or assigned value. Incorporating this unknown edge weight information effectively leverages the underlying graph structure. The resulting weighted adjacency matrix is:
\begin{equation}\label{eq: weighted adj train}
\mW^{train} =
\begin{cases}
W_{ij}, & \textrm{if } (i, j) \in \mE^{train}; \\
\delta, & \textrm{if } (i, j) \in \mE^{val} \cup \mE^{calib} \sup \mE^{test}; \\
0, & \textrm{otherwise},
\end{cases}
\end{equation}

In the transductive setting (details can been seen in appendix Figure~\ref{fig: transductive}(a)), the structure of the entire graph, represented by the adjacency matrix $\mA$, is known during the training, validation, and testing phases. To calibrate the prediction, we extract a subset from $\mE^{test}$ as a calibration edge set. This ensures that the calibration and test samples are exchangeable.

Consider edge weight prediction in traffic networks. The road system, $\mA$, and partial traffic volumes, $\mW^{train} + \mW^{val}$, are known. The task is to predict volumes, $\mW^{test}$, for the remaining roads.

During training, models observe node features and graph structure to learn functions for node classification/regression and embedding. At inference, models deduce edge connections between nodes (Figure~\ref{fig: transductive}).

Three GNN approaches are evaluated. The first is a Graph Autoencoder (GAE) \cite{kipf2016variational} that trains and infers on the full graph. The second is DiGAE \cite{kollias2022directed}, a directed GAE variant. The third is the line graph neural network (LGNN) \cite{cai2021line} that transforms edges to nodes in line graphs.

\section{Appendix Figures}
\subsection{Schematic figure for transductive and inductive settings for link prediction.}
\vspace{-5pt}
\begin{figure}[h]
\centerline{\includegraphics[height=3.5cm, width=0.7\textwidth,clip=]{figures/figure1.pdf}}
\small
\caption{Schematic figure for transductive and inductive settings for edge weight prediction.
Different colors indicate the availability of the nodes during the training or testing phases.
Solid and dashed lines represent edges used for training and the predicted edge in the testing phases, respectively.
(a) Transductive edge weight prediction performs both training and inference on the same graph.
(b) Inductive edge weight prediction inference is performed on a new, unseen graph.}
\label{fig: transductive}
\end{figure}

\vspace{-15pt}
\subsection{Learning the error structure of GNN prediction}
\vspace{-5pt}
\begin{figure}[h]
\centering
\includesvg[height=4.2cm]{figures/us_election_resultsOriFFF.svg}
\includesvg[height=4.2cm]{figures/us2016-ours.svg}
\caption{Residual for predicted 2016 U.S. county-level presidential election. (a) The baseline model's residual \cite{jia2020residual}, which is the normalized absolute difference between the predicted and ground truth vote count.  (b) RR-GNN's residual result. The results indicate that the proposed RR-GNN achieves smaller and more uniform error/residuals.}
\label{fig: comparison2}
\end{figure}

\subsection{Experiment: Residual between predicted and actual traffic volumes}
\vspace{-5pt}
\begin{figure}[H]
\centering
\includegraphics[height=3.4cm,width=0.244\textwidth]{figures/AnOri.png}
\includegraphics[height=3.4cm,width=0.244\textwidth]{figures/AnOursF.png}
\includegraphics[height=3.4cm,width=0.244\textwidth]{figures/ChiOri.png}
\includegraphics[height=3.4cm,width=0.244\textwidth]{figures/ChiOursF.png}
\caption{Residual between predicted and actual traffic volumes across roads in two cities under different models. We took the average volume at the start and end points as the road traffic. (a) Residual of predicted roads' traffic volume in Anaheim of baseline model \cite{huang2024uncertainty}, which is the absolute value of prediction and ground truth. (b) Residual of predicted roads' traffic volume in Anaheim of RR-GAE. (c) Residual of predicted roads' traffic volume in Chicago of baseline model. (d) Residual of predicted roads' traffic volume in Chicago of RR-GAE. For each city, the residuals from the two models were independently normalized to a 0-1 range for comparison purposes.}
\label{fig:comparisontraffic}
\end{figure}

\section{Algorithm for Node Regression}

\begin{algorithm}
\caption{Residual Reweighted Conformalized Graph Neural Network for Node Regression}
\label{alg: nodd regression}
\hspace*{\algorithmicindent} \textbf{Input:} The binary adjacency matrix $\mA \in \{0, 1\}^{n\times n}$, training node features $\mX\in \displaystyle R^{n\times m}$, training node set $\sV^{train}$ and label , $\displaystyle \vy^{train}$, validation node set $\sV^{val}$ and label- $\displaystyle \vy^{val}$ (Used for training Residual GNN), calibration nodes $\sV^{calib}$ and label $\displaystyle \vy^{calib}$, and test nodes $\sV^{test}$, user-specified error rate $\alpha \in (0,1)$, two GNN model $\gG_{\vtheta_1}$ and $\gG_{\vtheta_2}$ with trainable parameter $\displaystyle \vtheta_1$ and $\displaystyle \vtheta_2$.\\
\begin{algorithmic}[H]
\State Train the model $g_{\vtheta_1}$ and $g_{\vtheta_2}$ with $\displaystyle \vy^{train}$ and $\displaystyle \vy^{val}$ according to Algorithm 2 in main paper. %\ref{closs_train}.
\State Compute the nonconformity score, which quantifies the interval the predicted calibration node labels:
\begin{equation}\label{eq: CQR score}
V^{\textrm{RR}}_{i} = \max\left \{ \frac{\hat{y}^{\alpha/2}_{i} - y^{calib}_{i}}{\big|\hat{R}_{i}\big|}, \frac{y^{calib}_{i} - \hat{y}^{1-\alpha/2}_{i}}{\big|\hat{R}_{i}\big|}  \right\}, \; i \in {\displaystyle \sV}^{calib},    
\end{equation}
\State Compute $d =$ the $k$th smallest value in $\{V^{\textrm{RR}}_{i}\}$, where $k=\lceil(|{\displaystyle \sV}^{calib}| +1)(1-\alpha)\rceil$;
\State Construct a prediction interval for test nodes:  
\begin{equation}
    C_{a} = \Big[\hat{y}^{\alpha/2}_{a} - d\big|\hat{R}_{a}\big|, \hat{y}^{1-\alpha/2}_{a} + d\big|\hat{R}_{a}\big| \Big], \; a \in {\displaystyle \sV}^{test}. \nonumber
\end{equation}
\end{algorithmic}
\hspace*{\algorithmicindent} \textbf{Output:} Prediction of confidence intervals for the test nodes $a \in {\displaystyle \sV}^{test}$ with the coverage guarantee:
\begin{equation}
    p\big(y^{test}_{a} \in C_{a} \big) \geq 1 - \alpha.
\end{equation}
\end{algorithm}

\section{Nonconformity Score and Evaluation Metrics}
{\bf Nonconformity Score}
For the nonconformity score in main paper is for conformal prediction-based RR (CP-RR).
On the other hand, for conformal quantile regression-based RR (CQR-RR),
The nonconformity score is

\begin{equation}\label{eq: interval RRC}
\begin{split}
   C_{ab} =  \Big[ &\hat{W}^{\alpha/2}_{ab} - d^{\textrm{RR}} \big|\hat{W}^{1-\alpha/2}_{ab} - \hat{W}^{\alpha/2}_{ab} \big|,  \\
   & \hat{W}^{1-\alpha/2}_{ab} + d^{\textrm{RR}}\big|\hat{W}^{1-\alpha/2}_{ab} - \hat{W}^{\alpha/2}_{ab}\big| \Big], \; 
 \end{split}
\end{equation}
\begin{equation}
   (a, b) \in \displaystyle \mE^{test},
\end{equation}

%\vspace{0.1in}
%\noindent
{\bf Evaluation Metrics:}
For evaluation, we use the marginal coverage, defined as 
\begin{equation}\label{eq: cover}
    \textrm{cover} = \frac{1}{|\displaystyle E^{test}|} \sum_{(i,j)\in \displaystyle E^{test}} \mathbbm{1}\big(Wtest_{ij} \in C_{ij}\big),
\end{equation}
where $C_{ij}$ is prediction interval for edge $(i, j)$. Another one is inefficiency which is defined as
\begin{equation}\label{eq: ineff}
    \textrm{ineff} = \frac{1}{|\displaystyle E^{test}|} \sum_{(i,j)\in \displaystyle E^{test}} |C_{ij}|,
\end{equation}
which measures the average length of the prediction interval.

In addition to the marginal coverage, we also consider the conditional coverage. Specifically, we measure the coverage over a slab of the feature space $S_{\displaystyle v, a, b}= \big\{[\mX_{i, :} \mathbin\Vert \mX_{j, :}]\in \displaystyle R^{2f}: a \leq \displaystyle v^\top \mX \leq b \big\}$ \cite{romano2020classification, cauchois2020knowing}, where $[\mX_{i, :} \mathbin\Vert \mX_{j, :}]$ denotes the node feature of two connected nodes of an edge $(i, j)$ and $\displaystyle v \in \displaystyle R^{2f}$ and $a < b \in \displaystyle R$ are chosen adversarially and independently from the data. For any prediction interval $f_\theta^*$ and $\delta \in (0, 1)$, the \textit{worst slice coverage} is defined as 
\begin{equation}\label{eq: cond cover}
\begin{split}
    \textrm{WSC}(f_\theta^*, \delta) &= \inf\limits_{\substack{\displaystyle v \in \displaystyle R^{2f}, \\ a < b \in \displaystyle R}} \big \{ P \big( Wtest_{ij} \in C_{ij} \mid [\mX_{i, :} \mathbin\Vert \mX_{j, :}] \in S_{\displaystyle v, a, b}  \big) \\
    & \phantom{----------} \textrm{s.t. } P([\mX_{i, :} \mathbin\Vert \mX_{j, :}] \in S_{\displaystyle v, a, b}) \geq \delta  \big \}.
\end{split}
\end{equation}

\section{Algorithm for Node Classification}

\begin{algorithm}[h]
\caption{Residual Reweighted Conformalized Graph Neural Network for Node Classification}
\label{alg: nodd classification}
\hspace*{\algorithmicindent} \textbf{Input:} The binary adjacency matrix $\mA \in \{0, 1\}^{n\times n}$, training node features $\mX\in \displaystyle \textbf{R}^{n\times m}$, training node and class label variable ${\sV}^{train}$, $\displaystyle \vl^{train}$ ($\vl^{train}$ is the one-hot of $\vl^{train}$. The situation is also met for $\hat{l}$ and $\hat{l}$ which are class output of model), validation nodes and label ${\sV}^{val}$,  $\displaystyle \vl^{val}$ (Used for training Residual GNN), calibration nodes and label ${\sV}^{calib}$, $\vl^{calib}$, and test nodes ${\sV}^{test}$, user-specified error rate $\alpha \in (0,1)$, two GNN model $g_{\vtheta_1}$ and $g_{\vtheta_2}$ with trainable parameter $\vtheta_1$ and $\vtheta_2$.\\
\begin{algorithmic}[H]
\State Train the model $g_{\vtheta_1}$ and $g_{\vtheta_2}$ with $\displaystyle \vl^{train}$ and $\displaystyle \vl^{val}$ according to Algorithm 2 in main paper %\ref{closs_train}.
\State Compute the score which quantifies the residual of the calibration node classes $\displaystyle \vl^{calib}$ projected onto the nearest quantile produced by $g_{\vtheta_1}$ and $g_{\vtheta_2}$ :
\begin{equation}\label{eq: CQR score}
    V^{\textrm{RR}}_{i} = \max\left \{ \frac{\big|\hat{l}^{\alpha/2}_{i} - l^{calib}_{i}\big|}{\big|\hat{\mathbf{R}}_{i}\big| + \epsilon}, \frac{\big|l^{calib}_{i} - \hat{l}^{1-\alpha/2}_{i}\big|}{\big|\hat{\mathbf{R}}_{i}\big| + \epsilon}  \right\}, \; (i) \in \sV^{calib},    
\end{equation}
\State Compute $d =$ the $k$th smallest value in $\{V^{\textrm{RR}}_{i}\}$, where \
\begin{equation} \label{diffquantile}
    k=DiffQuantile(\lceil(| \sV^{calib} | +1)(1-\alpha)\rceil);
\end{equation}
\State Construct a prediction interval for test nodes:  
\begin{equation}
    C_{a} = \Big[\hat{l}^{\alpha/2}_{a} - d_{(m)}\big|\hat{R}_{a}\big|, \hat{l}^{1-\alpha/2}_{a} + d_{(m)}\big|\hat{R}_{a}\big| \Big], \; a \in \sV^{test}. \nonumber
\end{equation}
\end{algorithmic}
\hspace*{\algorithmicindent} \textbf{Output:} Prediction of confidence intervals for the test nodes $(a, b) \in \sV^{test}$ with the coverage guarantee:
\begin{equation}
    p\big(l^{test}_{a} \in C_{a} \big) \geq 1 - \alpha.
\end{equation}
\end{algorithm}
%\vspace{250pt}
\section{Complete table of experiment}

\begin{table*}[h]
\caption{Performance comparison of the proposed models}
\label{tab:eff_all_models1gsmg}
\centering
\begin{adjustbox}{width=\textwidth}
\begin{tabular}{|l|c|c|c|c|c|c|c|c|}
\toprule
GNN Model on Anaheim Data& \multicolumn{2}{c|}{GraphConv} & \multicolumn{2}{c|}{SAGEConv}  & \multicolumn{2}{c|}{GCNConv} & \multicolumn{2}{c|}{GATConv} \\ \cmidrule{1-9}
Score Method-CP  & cover$^x$  & ineff & cover$^x$ & ineff & cover$^x$ & ineff& cover$^x$ & ineff\\\midrule
GAE&$0.9156\std{0.0569} $ 
&${5.4093}\std{0.6783}$
&$0.9161\std{0.0617}$
&$6.2633\std{0.6041}$
&$0.9273\std{0.0556} $ 
&$6.3644\std{0.7133}$
&$0.9264\std{0.0702}$
&$6.4278\std{0.6963}$\\
DiGAE&$0.9163\std{0.0590} $ 
&${5.6076}\std{0.6497}$
&$0.9143\std{0.0662}$
&$6.3111\std{0.6624}$
&$0.9206\std{0.07034} $ 
&$6.4915\std{0.6903}$
&$0.9291\std{0.0539}$
&$6.3954\std{0.0416}$\\
LGNN&$0.9452\std{0.0287}$ 
&$6.9076\std{0.2908}$
&$0.9373\std{0.0360}$
&$6.4227\std{0.0455}$
&$0.9365\std{0.0388}$ 
&${6.3655}\std{0.5026}$
&$0.9391\std{0.0333}$
&$6.6962\std{0.3638}$\\ 
\midrule
Average & 0.9257 & 5.9748 & 0.9226 & 6.3324 & 0.9281 & 6.4071 & 0.9315 & 6.4398\\
\midrule
Score Method-CQR  & cover$^x$  & ineff & cover$^x$ & ineff & cover$^x$ & ineff& cover$^x$ & ineff\\\midrule
GAE&$0.9548\std{0.0206}$ 
&${5.2680}\std{0.3499}$
&$0.9535\std{0.0285}$
&$5.8272\std{0.2352}$
&$0.9576\std{0.0419}$
&$4.2310\std{1.4752}$
&$0.9578\std{0.0346}$
&$4.1396\std{1.3386}$\\
DiGAE&$0.8984\std{0.0926} $ 
&${5.0580}\std{0.2792}$
&$0.8975\std{0.0982}$
&$5.6483\std{0.2399}$
&$0.9040\std{0.0873} $ 
&$5.7600\std{0.2960}$
&$0.9115\std{0.0691}$  
&$5.7889\std{0.2722}$\\
LGNN&$0.9010\std{0.0555}$ 
&${5.4381}\std{0.1453}$
&$0.9167\std{0.0480}$
&$5.9004\std{0.2302}$
&$0.9333\std{0.0430}$ 
&$6.1160\std{0.1818}$
&$0.9080\std{0.0607}$
&$6.0694\std{0.1861}$ \\ 
\midrule
Average & 0.9180 & 5.2547 & 0.9226 & 5.7920 & 0.9316 & 5.3690 & 0.9258 & 5.3326\\
\midrule  
Score Method-CQR-RR(Ours)  & cover$^x$  & ineff & cover$^x$ & ineff & cover$^x$ & ineff& cover$^x$ & ineff\\\midrule
GAE&$0.9598\std{0.0292} $ 
&${5.1081}\std{0.0862}$
&$0.9559\std{0.0224}$
&$4.9831\std{0.0940}$
&$0.9595\std{0.0222} $ 
&$\textbf{3.8505}\std{0.1806}$
&$0.9553\std{0.0243}$
&$5.0743\std{0.2118}$\\
DiGAE&$0.9548\std{0.0341}$ 
&${4.9572}\std{0.1112}$
&$0.9448\std{0.0725}$
&$5.1398\std{0.1124}$
&$0.9589\std{0.0103}$ 
&$4.9422\std{0.1213}$
&$0.9564\std{0.0515}$
&$4.8513\std{0.1148}$\\
LGNN&$0.9538\std{0.0553}$
&${4.9094}\std{0.0816}$
&$0.9525\std{0.0438}$ 
&$4.8165\std{0.1626}$
&$0.9548\std{0.0273}$
&$4.0334\std{0.1014}$
&$0.9534\std{0.0241}$
&$4.9062\std{0.1004}$\\ 
\midrule
Average & \underline{\textbf{0.9560}} & \underline{\textbf{4.9912}} & \underline{\textbf{0.9508}} & \underline{\textbf{5.0193}} & \underline{\textbf{0.9575}} & \underline{\textbf{4.6423}} & \underline{\textbf{0.9553}} & \underline{\textbf{4.9739}}\\
\bottomrule
 &  &  &  &  &  &  & \\
\bottomrule
GNN Model On Chicago Data & \multicolumn{2}{c|}{GraphConv} & \multicolumn{2}{c|}{SAGEConv}  & \multicolumn{2}{c|}{GCNConv} & \multicolumn{2}{c|}{GATConv} \\ \cmidrule{1-9}
Score Method-CP  & cover$^x$  & ineff & cover$^x$ & ineff & cover$^x$ & ineff& cover$^x$ & ineff\\\midrule
GAE&$0.7984\std{0.1181}$ 
&$3.6659\std{0.3313}$
&$0.8297\std{0.1264}$
&${3.6350}\std{0.2231}$
&$0.8234\std{0.1213}$ 
&$3.6918\std{0.2454}$
&$0.9524\std{0.0333}$
&$3.3493\std{0.5910}$\\
DiGAE&$0.8081\std{0.1257} $ 
&${3.5721}\std{0.1951}$
&$0.8196\std{0.1215}$
&$3.5978\std{0.1884}$
&$0.8135\std{0.1361} $ 
&$3.5846\std{0.2050}$
&$0.8135\std{0.1319}$
&$3.6346\std{0.2432}$\\
LGNN&$0.9174\std{0.0238}$ 
&$6.7157\std{0.1325}$
&$0.9152\std{0.0256}$
&$6.5865\std{0.1577}$
&$0.9151\std{0.0246}$ 
&$6.5265\std{0.1426}$
&$0.9075\std{0.0618}$
&${6.0679}\std{0.1862}$\\ 
\midrule
Average & 0.8477 & 4.6512 & 0.8548 & 4.5998 & 0.8507 & 4.6010 & 0.8912 & 4.3506 \\
\midrule 
Score Method-CQR  & cover$^x$  & ineff & cover$^x$ & ineff & cover$^x$ & ineff& cover$^x$ & ineff\\\midrule
GAE&$0.9514\std{0.0144} $ 
&${3.3652}\std{0.1312}$
&$0.9517\std{0.0141}$
&$3.5878\std{0.2107}$
&$0.9578\std{0.0420}$
&$4.0504\std{1.2916}$
&$0.9524\std{0.0333}$
&$3.3292\std{0.5866}$\\
DiGAE&$0.9205\std{0.0498} $ 
&${3.3135}\std{0.1172}$
&$0.9223\std{0.0469}$
&$3.3872\std{0.1260}$
&$0.9250\std{0.0479} $ 
&$3.4241\std{0.1271}$
&$0.9089\std{0.0611}$
&$3.6158\std{0.2348}$\\
LGNN&$0.9284\std{0.0296}$ 
&${3.4362}\std{0.1029}$
&$0.9305\std{0.0258}$
&$3.4844\std{0.1233}$
&$0.9290\std{0.0284}$ 
&$3.6514\std{0.1050}$
&$0.9379\std{0.0261}$
&$4.0805\std{0.5445}$\\ 
\midrule  
Average & 0.9334 & 3.3716 & 0.9348 & 3.4865 & 0.9373 & 3.7086 & 0.9331 & 3.6752\\
\midrule
Score Method-CQR-RR(Ours)  & cover$^x$  & ineff & cover$^x$ & ineff & cover$^x$ & ineff& cover$^x$ & ineff\\\midrule
GAE&$0.9576\std{0.0132} $ 
&${3.0951}\std{0.1013}$
&$0.9559\std{0.0189}$
&$3.0633\std{0.1178}$
&$0.9535\std{0.0123}$ 
&$3.1318\std{0.1022}$
&$0.9528\std{0.0145}$
&$3.0018\std{0.1021}$\\
DiGAE&$0.9521\std{0.0385} $ 
&${3.0112}\std{0.1263}$
&$0.9503\std{0.1002}$
&$\textbf{2.9232}\std{0.1019} $
&$0.9514\std{0.0424}$
&$3.0323\std{0.1122}$
&$0.9438\std{0.0724}$ 
&$3.0619\std{0.1226}$\\
LGNN&$0.9447\std{0.0344}$
&${3.2321}\std{0.0625}$
&$0.9478\std{0.0235}$ 
&$3.0143\std{0.0316}$
&$0.9517\std{0.0325}$
&$\textbf{2.9172}\std{0.0633}$
&$0.9514\std{0.0222}$
&$3.2961\std{0.0858}$\\
\midrule
Average & \underline{\textbf{0.9517}} & \underline{\textbf{3.2871}} & \underline{\textbf{0.9515}} & \underline{\textbf{3.1552}} & \underline{\textbf{0.952  1}} & \underline{\textbf{3.1685}} & \underline{\textbf{0.9491}} & \underline{\textbf{3.2851}}\\
\bottomrule


\end{tabular}    
\end{adjustbox}
\vspace{0.0005in}
\justify
 The results are based on the conditional coverage (equation 5 in supplementary material ) and inefficiency (equation 4 in supplementary material) on the edge weight prediction task. The models were tested using several widely used graph convolutional layers, including GraphConv 
 \cite{morris2019weisfeiler}, SAGEConv \cite{hamilton2017inductive}, GCNConv \cite{kipf2016semi}, and GATConv \cite{velivckovic2017graph}. The best conditional coverage and inefficiency for each graph convolutional layer is highlighted in bold. Across diverse datasets and graph convolutional layers, CQR-GAE and CAR-RR-GAE demonstrate strong performance in both inefficiency and conditional coverage, while CQR-DiGAE and CQR-RR-DiGAE excel in minimizing inefficiency. 
\end{table*}






\section{Theoretical guarantee of our method}
7.1. Conformal Prediction

We assume we have access to the graph structure, $A$, the node features, $X$, and the weighted adjacency matrix $W^{\text {train }}(3)$. Let $(a, b)$ be the endpoints of a test edge. We aim to generate a prediction interval, $C_{a b}=\left(f_\theta\left((a, b), A, X, W^{\text {train }}\right)\right) \subset$ $\mathbb{R}$, for the weight of the such a test edge. The prediction interval should be marginally valid, i.e. it should obey
\begin{equation}
P\left(W_{a b} \in C_{a b}\right) \geq 1-\alpha,
\end{equation}
where $\alpha \in(0,1)$ is a user-defined error rate. The probability is over the datagenerating distribution. For efficiency, we focus on the split CP approach\cite{papadopoulos2002inductive}, using the training edge set $E^{\text {train}}$ for training and the calibration edge set $E^{\text {calib }}$ for calibration. $E^{\text {train }}$ is used to fit the prediction model, $f_\theta$, and a conformity score is calculated for each sample in $E^{\text {calib }}$. The conformity score evaluates how well the predictions match the observed labels.

Proposition 1. The prediction intervals generated by split CP (Algorithm 1), CQR (Algorithm 2), and RR are marginally valid, i.e. obey equation (14) in supplementary material.

Proof. First, we show that the calibration and test conformity scores defined in (4) and (11) in supplementary material and (12) in main paper are exchangeable. Given the entire graph structure, $A$, all the node features, $X$, and the edge weights of the training edges, $W^{\text {train }}$, the node embeddings are trained based on $W^{\text {train }}$, and the edge weights in the remaining $E^{\text {ct }}$ are set randomly, the division of $E^{\text {ct }}$ into $E^{\text {calib }}$ and $E^{\text {test }}$ have no impact on the training process. Consequently, the conformity scores for $E^{\text {calib }}$ and $E^{\text {test }}$ are exchangeable. In practice, we split $E^{\text {ct }}$ into $E^{\text {calib }}$ and $E^{\text {test }}$ randomly (as detailed in Section 1 in supplementary material) by converting the graph into its line graph and then selecting nodes uniformly at random.

We also explore an alternative proof which is equivalent to the proof in Huang et al.\cite{huang2024uncertainty} but applied within a line graph setting. Consider the original graph $G=(V, E)$ and its corresponding line graph $G^{\prime}=\left(V^{\prime}, E^{\prime}\right)$, where $V^{\prime}=E$ and $E^{\prime}$ denotes adjacency between edges in G. After randomly dividing E into $\mathbb{E}^{\text {train}}$ and $E^{\mathrm{ct}}$, and further splitting $E^{\mathrm{ct}}$ into $E^{\text {calib }}$ and $E^{\text {test }}$, the edges of $G$ transforms into nodes in $G^{\prime}$. This setup mirrors the node division in the line graph. We train node embeddings on $E^{\text {train }}$ using a graph autoencoder, which aligns with fixing the training node set in $G^{\prime}$. Given this fixed training set, any permutation and division of $E^{\text {ct }}$ (which corresponds to nodes in $G^{\prime}$ ) doesn't affect the training, and thus the conformity scores computed for $E^{\text {calib }}$ and $E^{\text {test }}$ are exchangeable.

Given this exchangeability of nonconformity scores, the validity of the prediction interval produced by CP and CQR follows from Theorem 2.2 of Lei et al.\cite{lei2018distribution} and Theorem 1 of Romano, Patterson, and Candes\cite{romano2019conformalized}. Let $V$ be the conformity score of CQR. The RR approach performs a monotone transformation of $V$, defined as
$$
\Phi_{i j}(V)=\frac{V}{\left|\hat{W}_{i j}^{1-\alpha / 2}-\hat{W}_{i j}^{\alpha / 2}\right|},
$$

where $i$ and $j$ are two nodes in the $\operatorname{graph}^2$. For all $(i, j)$ and all $V, \Phi_{i j}^{\prime}(V)=$ $\frac{\partial \Phi_{i j}(V)}{\partial V}>0$, i.e. the transformation is strictly monotonic in $V$. This implies $\Phi_{a b}$ is invertible for any test edge, $(a, b)$. Let $\Phi_{a b}^{-1}$ be the inverse of $\Phi_{a b}$. The Inverse Function Theorem implies $\Phi_{a b}^{-1}$ is also strictly increasing. Now suppose $d^{\mathrm{RR}}$ is the $k$ th smallest value in $\left\{V_{i j}^{\mathrm{RR}}\right\}=\left\{\Phi_{i j}\left(V_{i j}\right)\right\}, k=\left\lceil\left(\left|E^{\text {calib }}\right|+1\right)(1-\alpha)\right\rceil$. Then for a test edge $(a, b)$,
$$
P\left(\Phi_{a b}\left(V_{a b}\right) \leq d^{\mathrm{RR}}\right)=\frac{\left\lceil\left(\left|E^{\text {calib }}\right|+1\right)(1-\alpha)\right\rceil}{\left|E^{\text {calib }}\right|+1} \geq 1-\alpha .
$$

Using the monotonicity of $\Phi_{a b}^{-1}$,
$$
\begin{aligned}
1-\alpha & \leq P\left(\Phi_{a b}\left(V_{a b}\right) \leq V_k^{\mathrm{RR}}\right) \\
& =P\left(V_{a b} \leq \Phi_{a b}^{-1}\left(d^{\mathrm{RR}}\right)\right) \\
& =P\left(W_{a b} \in C_{a b}\right)
\end{aligned}
$$

The final equation is derived from the construction of the prediction interval in $C_a$ in Algorithm 1 in main paper and Algorithm 1 and 2 in supplementary material and the validity of CQR. This shows that the prediction intervals based on the reweighted conformity scores are valid.              Proof done.



\begin{table*}[ht]
\caption{Results of Ours (RR-GNN) on Node Classification Datasets}
\label{tab:eff_all_models1}
\centering
\begin{adjustbox}{width=\textwidth}
\begin{tabular}{|l|c|c|c|c|c|c|c|c|}
\toprule
Dataset& \multicolumn{2}{c|}{GraphSAGE} & \multicolumn{2}{c|}{SGC}  & \multicolumn{2}{c|}{GCN} & \multicolumn{2}{c|}{GATS} \\ \cmidrule{1-9}
Dataset  & cover$^x$  & ineff & cover$^x$ & ineff & cover$^x$ & ineff& cover$^x$ & ineff\\\midrule
Cora: CF-GNN&$0.9456\std{0.0569} $ 
&${1.6284}\std{0.0483}$
&$0.9461\std{0.0603}$
&$1.6633\std{0.0441}$
&$0.9473\std{0.0556} $ 
&$1.6344\std{0.0418}$
&$0.9464\std{0.0702}$
&$1.6278\std{0.0334}$\\
Cora: RR-GAE&$\textbf{0.9478}\std{0.0523} $ 
&$\textbf{1.5896}\std{0.0354}$
&$\textbf{0.9490}\std{0.0643}$
&$\textbf{1.5907}\std{0.0432}$
&$\textbf{0.9465}\std{0.0759} $ 
&$\textbf{1.6175}\std{0.0354}$
&$\textbf{0.9508}\std{0.0554}$
&$\textbf{1.6114}\std{0.0287}$\\ \midrule
DBLP: CF-GNN&$0.9501\std{0.0523} $ 
&${1.5723}\std{0.0683}$
&$\textbf{0.9451}\std{0.0617}$
&$1.5274\std{0.0416}$
&$0.9473\std{0.0596} $ 
&$1.5644\std{0.0733}$
&$0.9467\std{0.0717}$
&$1.5729\std{0.0463}$\\
DBLP: RR-GAE&$\textbf{0.9518}\std{0.0509} $ 
&$\textbf{1.5467}\std{0.0427}$
&$0.9503\std{0.0428}$
&$\textbf{1.3563}\std{0.0626}$
&$\textbf{0.9484}\std{0.0624} $ 
&$\textbf{1.5371}\std{0.0248}$
&$\textbf{0.9505}\std{0.0469}$
&$\textbf{1.5570}\std{0.0356}$\\ \midrule
CiteSeer: CF-GNN&$0.9528\std{0.0203} $ 
&${1.1680}\std{0.0439}$
&$0.9525\std{0.0257}$
&$\textbf{1.1827}\std{0.0552}$
&$0.9496\std{0.0392}$
&$1.2310\std{0.0332}$
&$0.9508\std{0.0309}$
&$1.2396\std{0.0416}$\\
CiteSeer: RR-GAE&$\textbf{0.9556}\std{0.0918} $ 
&$\textbf{1.1539}\std{0.0615}$
&$\textbf{0.9598}\std{0.0561}$
&$1.1678\std{0.0372}$
&$\textbf{0.9526}\std{0.0363}$ 
&$\textbf{1.2016}\std{0.0289}$
&$\textbf{0.9562}\std{0.0428}$  
&$\textbf{1.1408}\std{0.0361}$\\ \midrule  
PubMed: CF-GNN&$0.9502\std{0.0207} $ 
&${1.4680}\std{0.0361}$
&$0.9508\std{0.0276}$
&$1.4272\std{0.0325}$
&$0.9516\std{0.0458}$
&$1.5310\std{0.0514}$
&$0.9512\std{0.0434}$
&$1.4396\std{0.0485}$\\
PubMed: RR-GAE&$\textbf{0.9526}\std{0.0483}$ 
&$\textbf{1.3275}\std{0.0392}$
&$\textbf{0.9520}\std{0.0482}$
&$\textbf{1.3897}\std{0.0339}$
&$\textbf{0.9521}\std{0.0473}$ 
&$\textbf{1.3732}\std{0.0296}$
&$\textbf{0.9515}\std{0.0419}$  
&$\textbf{1.3989}\std{0.0522}$\\ \midrule
Computers: CF-GNN&$0.9471\std{0.0276} $ 
&${3.3680}\std{0.3499}$
&$0.9492\std{0.0235}$
&$3.8272\std{0.0292}$
&$0.9457\std{0.0435}$
&$3.2310\std{0.0652}$
&$0.9478\std{0.0325}$
&$3.1396\std{0.0586}$\\
Computers: RR-GAE&$\textbf{0.9503}\std{0.0553} $ 
&$\textbf{2.7423}\std{0.0258}$
&$\textbf{0.9509}\std{0.0315}$
&$\underline{\textbf{2.6343}}\std{0.0413}$
&$\textbf{0.9418}\std{0.0436}$ 
&$\textbf{2.5471}\std{0.0365}$
&$\textbf{0.9354}\std{0.0584}$  
&$\underline{\textbf{2.7739}}\std{0.0272}$\\ \midrule
Photo: CF-GNN&$0.9511\std{0.0275} $ 
&${3.2680}\std{0.0395}$
&$0.9515\std{0.0263}$
&$2.2276\std{0.0354}$
&$0.9486\std{0.0419}$
&$2.2010\std{0.0387}$
&$0.9509\std{0.0391}$
&$2.1986\std{0.0286}$\\
Photo: RR-GAE&$\textbf{0.9554}\std{0.0723} $ 
&$\textbf{2.5474}\std{0.0456}$
&$\textbf{0.9534}\std{0.0913}$
&$\textbf{2.0026}\std{0.0316}$
&$\textbf{0.9504}\std{0.0342} $ 
&$\textbf{2.0003}\std{0.0370}$
&$\textbf{0.9498}\std{0.0512}$  
&$\textbf{1.7093}\std{0.0234}$\\ \midrule
CS: CF-GNN&$0.9438\std{0.0224} $ 
&${1.8669}\std{0.0347}$
&$0.9435\std{0.0284}$
&$1.6272\std{0.0452}$
&$0.9476\std{0.0416}$
&$3.6310\std{0.0325}$
&$0.9478\std{0.0317}$
&$2.7396\std{0.0286}$\\
CS: RR-GAE&$\textbf{0.9502}\std{0.0601} $ 
&$\textbf{1.8430}\std{0.0361}$
&$\textbf{0.9501}\std{0.0528}$
&$\textbf{1.6183}\std{0.0361}$
&$\textbf{0.9516}\std{0.0525} $ 
&$\underline{\textbf{2.5469}}\std{0.0227}$
&$\textbf{0.9485}\std{0.0329}$  
&$\textbf{2.3889}\std{0.0238}$\\ \midrule
Physics: CF-GNN&$0.9495\std{0.0243} $ 
&${1.2218}\std{0.0463}$
&$0.9507\std{0.0292}$
&$1.2430\std{0.0324}$
&$0.9489\std{0.0257} $ 
&$1.2005\std{0.0604}$
&$0.9505\std{0.0275}$
&$1.2243\std{0.0246}$\\
Physics: RR-GAE&$\textbf{0.9518}\std{0.0511}$ 
&$\textbf{1.2050}\std{0.0223}$
&$\textbf{0.9528}\std{0.0542}$
&$\textbf{1.2279}\std{0.0419}$
&$\textbf{0.9508}\std{0.0334}$ 
&$\textbf{1.1998}\std{0.0438}$
&$\textbf{0.9522}\std{0.0493}$
&$\textbf{1.2187}\std{0.0238}$\\ \bottomrule
\end{tabular}    
\end{adjustbox}    
\end{table*}




\begin{thebibliography}{}
\setlength{\itemindent}{-\leftmargin}
\makeatletter\renewcommand{\@biblabel}[1]{}\makeatother
\bibitem{romano2020classification} Romano, Yaniv and Sesia, Matteo and Candes, Emmanuel (2020).
    \newblock Classification with valid and adaptive coverage.
    \newblock \textit{Advances in Neural Information Processing Systems} \textbf{33}, 3581--3591.
    
\bibitem{cauchois2020knowing} Cauchois, Maxime and Gupta, Suyash and Duchi, John (2020).
    \newblock Knowing what you know: valid and validated confidence sets in multiclass and multilabel prediction.
    \newblock \textit{arXiv preprint arXiv:2004.10181} \textbf{}, .

\bibitem{khan2022dvaegmm} Khan, Wasim and Haroon, Mohammad and Khan, Ahmad Neyaz and Hasan, Mohammad Kamrul and Khan, Asif and Mokhtar, Umi Asma and Islam, Shayla (2022).
    \newblock DVAEGMM: Dual variational autoencoder with gaussian mixture model for anomaly detection on attributed networks.
    \newblock \textit{IEEE Access} \textbf{10}, 91160--91176.

\bibitem{kipf2016variational} Kipf, Thomas N and Welling, Max (2016).
    \newblock Variational graph auto-encoders.
    \newblock \textit{arXiv preprint arXiv:1611.07308} \textbf{}, .
\bibitem{kollias2022directed} Kollias, Georgios and Kalantzis, Vasileios and Id{\'e}, Tsuyoshi and Lozano, Aur{\'e}lie and Abe, Naoki (2022).
    \newblock \textit{Directed graph auto-encoders}.
    \newblock Proceedings of the AAAI Conference on Artificial Intelligence.
\bibitem{morris2019weisfeiler} Morris, Christopher and Ritzert, Martin and Fey, Matthias and Hamilton, William L and Lenssen, Jan Eric and Rattan, Gaurav and Grohe, Martin (2019).
    \newblock \textit{Weisfeiler and leman go neural: Higher-order graph neural networks}.
    \newblock Proceedings of the AAAI conference on artificial intelligence.
\bibitem{cai2021line} Cai, Lei and Li, Jundong and Wang, Jie and Ji, Shuiwang (2021).
    \newblock Line graph neural networks for link prediction.
    \newblock \textit{IEEE Transactions on Pattern Analysis and Machine Intelligence} \textbf{44}, 5103--5113.
\bibitem{jia2020residual} Jia, Junteng and Benson, Austion R (2020).
    \newblock Residual correlation in graph neural network regression.
    \newblock \textit{Proceedings of the 26th ACM SIGKDD international conference on knowledge discovery} 
    \textbf{44}, 588--598.

\bibitem{huang2024uncertainty} Huang, Kexin and Jin, Ying and Candes, Emmanuel and Leskovec, Jure (2024).
    \newblock Uncertainty quantification over graph with conformalized graph neural networks.
    \newblock \textit{Advances in Neural Information Processing Systems} \textbf{36}, .

    
\bibitem{kipf2016semi} Kipf, Thomas N and Welling, Max (2016).
    \newblock Semi-supervised classification with graph convolutional networks.
    \newblock \textit{arXiv preprint arXiv:1609.02907} \textbf{}, .

\bibitem{hamilton2017inductive} Hamilton, Will and Ying, Zhitao and Leskovec, Jure (2017).
    \newblock Inductive representation learning on large graphs.
    \newblock \textit{Advances in neural information processing systems} \textbf{30}, .
\bibitem{velivckovic2017graph} Veli{\v{c}}kovi{\'c}, Petar and Cucurull, Guillem and Casanova, Arantxa and Romero, Adriana and Lio, Pietro and Bengio, Yoshua (2017).
    \newblock Graph attention networks.
    \newblock \textit{arXiv preprint arXiv:1710.10903} \textbf{}, .
\bibitem{chernozhukov2010quantile} Chernozhukov, Victor and Fern{\'a}ndez-Val, Iv{\'a}n and Galichon, Alfred (2010).
    \newblock Quantile and probability curves without crossing.
    \newblock \textit{Econometrica} \textbf{78}, 1093--1125.

\bibitem{lei2018distribution} Lei, Jing and G’Sell, Max and Rinaldo, Alessandro and Tibshirani, Ryan J and Wasserman, Larry (2018).
    \newblock Distribution-free predictive inference for regression.
    \newblock \textit{Journal of the American Statistical Association, 2018} 
    \textbf{523}, 1094--1111.

\bibitem{romano2019conformalized} Romano, Yaniv and Patterson, Evan and Candes, Emmanuel (2019).
    \newblock Conformalized quantile regression.
    \newblock \textit{Advances in neural information processing systems, 2019} 
    \textbf{32}.

\bibitem{papadopoulos2002inductive} Papadopoulos, Harris and Proedrou, Kostas and Vovk, Volodya and Gammerman, Alex (2002).
    \newblock Inductive confidence machines for regression.
    \newblock \textit{13th European conference on machine learning Helsinki, Finland, August 19--23, 2002 proceedings 13} 
    \textbf{}, 345--356.


\end{thebibliography}


\vfill

\end{document}
