\section{Proofs}
\label{app:Proofs}




\begin{definition}\textbf{(Absolute Spectral Gap \citep{jiang2018bernstein})} \label{def:ASG}
A $\pi$-invariant Markov operator $P$ has non-zero absolute spectral gap
$1 - \lambda(P)$ if
\[
\lambda(P) = \sup \left\{ \|Ph\|_{\pi} : \|h\|_{\pi} = 1,\; h \in \mathcal{L}_2^0 \right\} < 1.
\]
\end{definition}


\begin{lemma} \textbf{(Neyman--Pearson)} \label{lem: Neyman Pearson}
Let \(f\) and \(g\) be non-negative measurable functions, with \(g>0\) almost everywhere. Suppose there exists \(t\geq 0\) such that
\[
\int_{\{f/g \geq t\}} f = 1-\alpha .
\]
Then \(B_t=\{f/g \geq t\}\) is an optimiser of the problem
\[
\min_B \int_B g
\quad \text{subject to} \quad
\int_B f \geq 1-\alpha .
\]
\end{lemma}


\subsection{Proof of Theorem \ref{thm: ACI guarantee}}
\label{app:adaptiv_proof}
The argument follows Appendix A.7 of \cite{ACI}, with the environment chain $A_t$ replaced by the score chain $B_i$.
We can write
\begin{align*}
\mathbb P\Bigg(\Bigg| \frac{1}{N}\sum_{i=1}^N \mathrm{err}_i - \alpha\Bigg| >\varepsilon\Bigg)
&\le
\mathbb P\Bigg(\Bigg| \frac{1}{N}\sum_{i=1}^N (\mathrm{err}_i - \mathbb E[\mathrm{err}_i\mid B_{n+i}])\Bigg|  > \frac{\varepsilon}{2}\Bigg) \\
&\quad + \mathbb P\Bigg(\Bigg| \frac{1}{N}\sum_{i=1}^N (\mathbb E[\mathrm{err}_i\mid B_{n+i}]-\alpha)\Bigg|  > \frac{\varepsilon}{2}\Bigg).
\end{align*}

We will first bound the first term using the Hoeffding bound. The proof of Lemma A.2 in the original text uses only the binary-valued errors and the monotonicity of $\alpha_i$ in past errors, $\sum_{s=1}^{i-1}\text{err}_s$. This argument is unchanged here, yielding
\[
\mathbb P\Bigg(\Bigg|\frac{1}{N}\sum_{i=1}^N (\mathrm{err}_i - \mathbb E[\mathrm{err}_i\mid B_{n+i}])\Bigg|  > \frac{\varepsilon}{2}\Bigg)
\le 2\exp(-N\varepsilon^2/8).
\]

Now we focus on bounding the second term. Define $f(b)=\mathbb E[\mathrm{err}_i\mid B_{n+i}=b]-\alpha$. Then $f$ is bounded in $[-B,B]\subseteq[-1,1]$, mean-zero under stationarity, and the variance proxy is $\sigma_B^2$. Applying the Bernstein inequality for Markov chains (Theorem A.1 in the original text) yields
\[
\mathbb P\Bigg(\Bigg| \frac{1}{N}\sum_{i=1}^N (\mathbb E[\mathrm{err}_i\mid B_{n+i}]-\alpha)\Bigg|  > \frac{\varepsilon}{2}\Bigg)
\le 2\exp\Bigg(-\frac{N(1-\eta)\varepsilon^2}{8(1+\eta)\sigma_B^2+20B\varepsilon}\Bigg).
\]

Combining the two bounds proves the theorem.

\subsection{Proof of Theorem~\ref{thm:oracle max efficiency}}
\label{app:oracle max efficiency}

For \(t\geq 0\), define
\[
H(t) \coloneqq \{(x,y)\in\mathcal X\times\mathcal Y : p(y \mid x) \ge t\},
\]
and
\[
h(t) \coloneqq \mathbb{P}\big((X,Y)\in H(t)\big)
= \mathbb P\big(p(Y\mid X)\ge t\big).
\]
Then \(h\) is non-increasing, with \(h(0)=1\) and \(\lim_{t\to\infty} h(t)=0.\) 

By the no-flat-spots assumption, \(h\) is continuous. Hence, for every \(\alpha\in(0,1)\), there exists a threshold \(t_\alpha\) such that \(h(t_\alpha)=1-\alpha.\)

Set \(B_\alpha \coloneqq H(t_\alpha)\), and let \(f(x,y)\coloneqq p(x,y)\), and $g(x,y)\coloneqq p_X(x)$. Then
\[
\frac{f(x,y)}{g(x,y)}
=
p(y\mid x),
\]
so
\[
B_\alpha
=
\left\{(x,y)\in\mathcal X\times\mathcal Y:
\frac{f(x,y)}{g(x,y)}\ge t_\alpha
\right\}.
\]
Moreover,
\[
\int_{B_\alpha}g(x,y)\,dx\,dy
=
\int_{\mathcal X}p_X(x)|B_\alpha(x)|\,dx
=
\mathbb E_X[|B_\alpha(X)|],
\]
and
\[
\int_{B_\alpha} f(x,y)\,dx\,dy
=
\mathbb P\big((X,Y)\in B_\alpha\big)
=
\mathbb P\big(Y\in B_\alpha(X)\big)
=
h(t_\alpha)
=
1-\alpha.
\]
Therefore, by Lemma~\ref{lem: Neyman Pearson}, \(B_\alpha\) is an optimiser of
the marginal coverage problem.

It remains to show that \(B_\alpha\) is a sublevel set of \(\hat S\). By
assumption, there exists a strictly decreasing function
\(\phi:[0,\infty)\to\mathbb R\) such that
\[
\hat S(X,Y)=\phi(p(Y\mid X))
\]
\(P_{X,Y}\)-almost surely. Since \(\phi\) is strictly decreasing,
\[
p(y\mid x)\ge t_\alpha
\iff 
\phi(p(y\mid x))\le \phi(t_\alpha).
\]
Thus, setting \(\lambda_\alpha \coloneqq \phi(t_\alpha),\) we have
\[
B_\alpha
=
\{(x,y):\hat S(x,y)\le \lambda_\alpha\}
\]
up to \(P_{X,Y}\)-null sets. Hence, for every \(\alpha\in(0,1)\), there exists a threshold \(\lambda_\alpha\) such that the conformal sublevel set \(C_{\lambda_\alpha}\) solves the marginal coverage objective. Therefore \(\hat S\in\mathcal S_1.\)

\subsection{Proof of Theorem~\ref{thm:oracle with conditional}} \label{app:proof-oracle with conditional}

For each \(x \in \mathcal X\), define the conditional high-density region
\[
H_x(t) \vcentcolon= \{y : p(y \mid x) \ge t\},
\]
and let
\[
h_x(t) \vcentcolon= \int_{H_x(t)}p(y'\mid x) \mathrm{d}y'.
\]
Note that \(h_x\) is non-increasing, with \(h_x(0)=1\) and
\(\lim_{t\rightarrow\infty} h_x(t)=0\).

By the no flat spots assumption, \(h_x(t)\) is continuous for
\(P_X\)-almost every \(x\). Hence, for every \(\alpha\in(0,1)\), there exists
\(t_{\alpha,x}\) such that
\[
h_x(t_{\alpha,x}) = 1-\alpha
\]
for \(P_X\)-almost every \(x\).

Define the prediction set
\[
B_\alpha(x)\vcentcolon=H_x(t_{\alpha,x}).
\]
By construction,
\[
\mathbb P\big(Y\in B_\alpha(x)\mid X=x\big)
=
h_x(t_{\alpha,x})
=
1-\alpha
\]
for \(P_X\)-almost every \(x\). Moreover, as a conditional high-density region, \(B_\alpha(x)\) has the minimal Lebesgue measure among all measurable sets with conditional probability at least \(1-\alpha\). Therefore \(B_\alpha=\{(x,y):y\in B_\alpha(x)\}\) is a solution to optimisation problem \ref{min:2} at level \(\alpha\).

It remains to show that \(B_\alpha\) is a sublevel set of \(\hat S\). By assumption, there exists a strictly increasing function
\(\phi:[0,1]\to\mathbb R\) such that
\[
\hat S(x,y)
=
\phi\bigl(h_x(p(y\mid x))\bigr)
\]
for \(P_{X,Y}\)-almost every \((x,y)\).

Now fix \((x,y)\in\mathcal X\times\mathcal Y\). We have
\begin{align*}
    y \in B_\alpha(x)
    &\iff 
    p(y \mid x) \ge t_{\alpha,x} \\
    &\iff 
    H_x\left(p(y \mid x)\right) \subseteq H_x(t_{\alpha, x})\\
    &\iff 
    h_x(p(y\mid x)) \leq
    h_x(t_{\alpha, x}) = 1-\alpha,
\end{align*}
where the second equivalence follows from the monotonicity of level sets and the last equivalence holds for $P_{X,Y}$-almost every $(x,y)$.

Since \(\phi\) is strictly increasing,
\[
h_x(p(y\mid x))\leq 1-\alpha
\iff
\phi\bigl(h_x(p(y\mid x))\bigr)\leq \phi(1-\alpha).
\]
Thus, setting
\[
\lambda_\alpha \coloneqq \phi(1-\alpha),
\]
we have
\[
B_\alpha(x)
=
\{y:\hat S(x,y)\leq \lambda_\alpha\}
\]
for \(P_X\)-almost every \(x\), up to conditional null sets. Hence, for every \(\alpha\in(0,1)\), there exists a threshold \(\lambda_\alpha\) such that the conformal sublevel set \(C_{\lambda_\alpha}\) solves the conditional coverage objective. Therefore \(\hat S\in\mathcal S_2\).


