\section{Proofs in Section~\ref{sec:label_prop}} \label{appendix:label prop}
We provide additional proof details from Section~\ref{sec:label_prop} below. 

% \subsection{Proof of Invertibility}\label{appendix:invertibility}

% % \begin{lemma}\label{lem:alpha invertibility}
% % Let $S = D^{-1/2}WD^{-1/2}$ and $\alpha \in (0,1)$, then $(I - \alpha S)$ is invertible.
% % \end{lemma}
% % \begin{proof}
% %     Each entry of $S$ is $S_{ij} = W_{ij}/\sqrt{d_{i} d_{j}} \in [0,1]$ for $i \neq j$, and $S_{ii} = 0$. Then, $(I - \alpha S)$ is symmetric and strictly diagonally dominant with positive diagonal elements, so it is positive definite and thus invertible.
% % \end{proof}

% \begin{lemma}\label{lem:lambda invertibility}
%     Let $G = (V, E)$ be a graph with $n$ vertices and $m$ weighted edges. Let $W$ be its adjacency matrix, $D = \text{diag}(W \vec{1}_n)$, $L = D-W$, $\Delta \in \{0,1\}^{n \times n}$ be a diagonal matrix where elements are 1 only if the index is in the labeled set, and $\lambda > 0$. Then, $(L + \lambda \Delta)$ is invertible if, for each connected component in $G$, there is at least one labeled node.
% \end{lemma}
% \begin{proof}
%     Let $x \in \mathbb{R}^n \backslash \{ 0\}$, then 
%     $$x^\top (L + \lambda \Delta)x = x^\top B^\top B x+ \lambda x^\top \Delta x \geq 0,$$
%     where $B \in \mathbb{R}^{m \times n}$ is incidence matrix defined by $$B_{e,v} = \begin{cases}
%     -\sqrt{W_e}, & \text{if $v$ is the initial vertex of edge $e$}\\
%     \sqrt{W_e}, & \text{if $v$ is the end vertex of edge $e$}\\
%     0, & \text{otherwise}.
%     \end{cases}$$ 
%     This shows that $(L + \lambda \Delta)$ is positive semi-definite, so it is invertible only when it is positive definite, or equivalently, $x^\top (L + \lambda \Delta)x \neq 0$. 
    
%     Consider when $x^\top (L + \lambda \Delta) x = 0$, then $x^\top B^\top B x = 0$ and $\lambda x^\top \Delta x = 0$. For $x^\top B^\top B x = (Bx)^\top (Bx) = 0$, this means $$[Bx]_e = \sqrt{W_e}(x_{e_2} - x_{e_1})=0, \ \  \text{for all edges $e$},$$
%     where $e_1, e_2$ are the initial and end vertices of edge $e$. This implies $x^\top B^\top Bx = 0$ only when $x_i = x_j$ for any $i, j$ that are connected. Let $g_1, ..., g_k \subseteq V$ represent the sets of vertices in each connected component of $G$, and $(a_j)_{i \in [k]}$ be a sequence of real numbers that are not all zero. For each $j \in [k]$, we assume $x_i = a_j$ for all $i \in g_j$. 
    
%     Under these assumptions on $x$, $x \neq \mathbf{0}$ and $x^\top B^\top Bx = 0$ always holds. We now consider when $\lambda x^\top \Delta x = 0$: 
%     $$\lambda x^\top \Delta x = \lambda\sum_{i=1}^n \Delta_{ii} x_i^2 = \lambda \sum_{j=1}^k a_j^2 \left(\sum_{i \in g_j} \Delta_{ii}\right) = \lambda \sum_{j \in [k], a_j \neq 0} a_j^2 \left(\sum_{i \in g_j} \mathbf{1}_{\text{label}}(i) \right) = 0.$$
%     Since $\lambda > 0$ and $\mathbf{x} \neq 0$, the equation only holds when $\sum_{i \in g_j} \mathbf{1}_{\text{label}}(i) = 0$ for some $g_j$, i.e. there exists some connected component such that none of its nodes are labeled.

%     We conclude that the matrix $(L + \lambda \Delta)$ is positive definite and thus invertible if there exists at least one labeled node in each connected component of $G$. 
% \end{proof}


\subsection{Proof of Lemma~\ref{lem:degree of determinant}}\label{appendix:proof of determinant}


\begin{proof}
    Using the adjugate matrix, we have
    $$C(x) = \frac{1}{\det(A+xB)}\text{adj}(A+xB).$$
    The determinant of $A+xB$ can be written as
    $$\det(A+xB) = \sum_{\sigma \in S_n}\left(\text{sgn}(\sigma) \prod_{i=1}^n [A+xB]_{i\sigma_i}\right),$$
    where $S_n$ represents the symmetric group and $\text{sgn}(\sigma) \in \{\pm 1\}$ is the signature of permutation $\sigma$. Thus $\det(A+xB)$ is a polynomial of $x$ with a degree at most $n$.  The adjugate of $A+xB$ is  $$\text{adj}(A+xB) = C^\top,$$ where $C$ is the cofactor matrix of $A+xB$. By definition, each entry of $C$ is $C_{ij} = (-1)^{i+j}k_{ij}$ where $k_{ij}$ is the determinant of the $(n-1) \times (n-1)$ matrix that results from deleting $i$-th row and $j$-th column of $A+xB$. This implies that each entry of $C$ (and thus $\text{adj}(A+xB)$) is a polynomial of degree at most $n-1$. Letting $Q(x) = \det (A+xB)$ and $P_{ij}(x) = [\text{adj}(A+xB)]_{ij}$ concludes our proof.
\end{proof}

\subsection{Proof of Lemma~\ref{lem: delta F form}}
\begin{proof}
    The $ij$-th element of $I - c \cdot S$ is 
\[[I - c \cdot S]_{ij} = \begin{cases}
    -c \cdot d_{i}^{-\delta}W_{ij}d_{j}^{\delta-1} = -(d_{i}^{-1}d_{j})^{\delta}(c \cdot W_{ij}d_{j}^{-1}) & \text{, if $i \neq j$}\\
    1=(d_{i}^{-1}d_{i})^\delta & \text{, otherwise.}
\end{cases}\]
Using adjugate matrix, we have
    $$(I-c\cdot S)^{-1} = \frac{1}{\det(I-c \cdot S)}\text{adj}(I-c\cdot S).$$
Note that the determinant of any $k \times k$ matrix $A$ can be written as 
$$\det(A) = \sum_{\sigma \in S_k}\left(\text{sgn}(\sigma) \prod_{i=1}^k [A]_{i\sigma_i}\right),$$
where $S_k$ represents the symmetric group and $\text{sgn}(\sigma) \in \{\pm 1\}$ is the signature of permutation $\sigma$. 

Now consider $\text{adj}(I-c \cdot S)$. Let $M_{ij}$ be the $(n-1) \times (n-1)$ matrix resulting from deleting $i$-th row and $j$-th column from $[I- c \cdot S]$. Then, 
$$[\text{adj}(I - c \cdot S)]_{ij} = (-1)^{i+j}\det(M_{ji}) = \sum_{\sigma \in S_{n-1}}\left(\text{sgn}(\sigma) \prod_{k=1}^{n-1} [M_{ji}]_{k\sigma_k}\right) = \sum_{\sigma \in S_{n-1}}\left(a_\sigma \exp(\delta\ln b_\sigma)\right),$$
for some constants $a_\sigma, b_\sigma$ that satisfies

$$b_\sigma = (\prod_{k \in [n]\backslash \{j\}} d_{k}^{-1})(\prod_{k \in [n]\backslash\{i\}}d_{k}) = d_{i}^{-1}d_{j}.$$

We can then rewrite $[\text{adj}(I- c \cdot S)]_{ij}$ as
$$[\text{adj}(I- c \cdot S)]_{ij} = \sum_{\sigma \in S_{n-1}} (a_\sigma \exp(\delta \ln(d_{i}^{-1}d_{j}))) = a_{ij}\exp(\delta \ln(d_{i}^{-1}d_{j})),$$
where $a_{ij} = \sum_{\sigma \in S_{n-1}} a_\sigma$.
\end{proof}


\subsection{Proof of Lemma~\ref{lem:roots of exp sum}}
\begin{proof}
We prove by induction on $n$. If $n=1$, then $f(x) = ae^{bx}$ and $a \neq 0$, so $f(x)$ has $0=n-1$ root. Now assume that the statement holds for some $n=m$ and consider when $n=m+1$. That is, we have
$$f(x) = \sum_{i=1}^{m+1} a_i e^{b_i x}.$$
Assume for the sake of contradiction that $f$ has $n = m+1$ roots. Define  
$$g(x) = \frac{f(x)}{e^{b_{m+1}x}} = \sum_{i=1}^m a_ie^{(b_i - b_{m+1})x} + a_{m+1},$$
then $g$ also has $m+1$ roots. Since $g$ is continuous, 
$$g^\prime (x) = \sum_{i=1}^m (b_i - b_{m+1})a_ie^{(b_i - b_{m+1})x}$$
must have $m$ roots. However, using our induction hypothesis, it should have at most $m-1$ roots. This means our assumption is incorrect, i.e. $f$ must have at most $m = n-1$ roots. 

We conclude that $f$ must have at most $n-1$ roots. 
\end{proof}

\subsection{Proof of Theorem~\ref{thm:alpha upper bound}}\label{appendix:alpha}
\paragraph{Upper Bound.} Proof is given in \Cref{sec:label_prop}.
% Recall that the scoring matrix $F^* \in \mathbb{R}^{n \times c}$ has the following closed form 
% $F^* = (1-\alpha)(I-\alpha S)^{-1} Y,$
% where $S = D^{-1/2}WD^{-1/2}$ for the degree matrix $D$.
% By Lemma~\ref{lem:degree of determinant}, for a fixed problem instance, each $[F^*]_{ij}$ is a rational polynomial in $\alpha$ of the form $P_{ij}(\alpha) / Q(\alpha)$, where $P_{ij}$ and $Q$ are polynomials of degree $n$ and $n$, respectively. 

% Recall that the prediction on each node $i \in [n]$ is $\hat y_i = \text{argmax}_{j \in [c]}([F^*]_{ij})$, so the prediction on a node can change only when $\text{sign}([F^*]_{ij} - [F^*]_{ik})$ changes for some $j,k \in [c]$. Since $[F^*]_{ij} - [F^*]_{ik}$ is a rational polynomial $(P_{ij}(\alpha) - P_{ik}(\alpha)) / Q(\alpha)$ where $(P_{ij}(\alpha) - P_{ik}(\alpha))$ and $Q$ are degree of at most $n$, its sign can only change at most $O(n)$ times. This implies that the prediction on a single node can change at most ${c \choose 2}O(n) \in O(nc^2)$ times as $\alpha$ is varied.
% For $n$ nodes, this implies we have at most $O(mn^2c^2)$ distinct values of the loss function over the $m$ problem instances. The pseudo-dimension $m$ satisfies $2^m \leq O(mn^2c^2)$, which implies $\textsc{Pdim}(\mathcal{H}_{\alpha}) = O(\log n)$. \qed


\paragraph{Lower Bound.} We first construct the small connected component of $4$ nodes:
\begin{lemma} \label{lem:alpha connected components}
Given $x\in [1/\sqrt{2},1)$, there exists a labeling instance $(G, L)$ with $4$ nodes, such that the predicted label of the unlabeled points changes only at $\alpha = x$ as $\alpha$ varies in $(0,1)$.
\end{lemma}
\begin{proof}
We use binary labeling $a$ and $b$. We have two points labeled $a$ (namely $a_1, a_2$), and one point labeled $b$ (namely $b_1$) connected with both $a_1$ and $a_2$ with edge weight $1$. We also have an unlabeled point $u$ connected to $b_1$ with edge weight $x \geq 0$. That is, the affinity matrix and initial labels are $$W = \begin{bmatrix}
    0 & 1 & 1 & x\\
    1 & 0 & 0 & 0\\
    1 & 0 & 0 & 0\\
    x & 0 & 0 & 0
\end{bmatrix}, Y = \begin{bmatrix}
    1 & 0 \\
    0 & 1 \\
    0 & 1 \\
    0 & 0 
\end{bmatrix}.$$
Recall that the score matrix is $$F^* = (1-\alpha)(I -\alpha S)^{-1}Y .$$ 
We now calculate: 
\begin{align*}
    D^{-1/2} =& \begin{bmatrix}
    (x+2)^{-1/2} & 0 & 0 & 0\\
    0 & 1 & 0 & 0\\
    0 & 0 & 1 & 0\\
    0 & 0 & 0 & x^{-1/2}
\end{bmatrix},
\end{align*}
\begin{align*}
S = D^{-1/2}WD^{-1/2} =&
\begin{bmatrix}
0 & (x+2)^{-1/2} & (x+2)^{-1/2} & x^{1/2}(x+2)^{-1/2} \\
(x+2)^{-1/2} & 0 & 0 & 0\\
(x+2)^{-1/2} & 0 & 0 & 0\\
x^{1/2}(x+2)^{-1/2} & 0 & 0 & 0
\end{bmatrix},
\end{align*}
\begin{align*}
(I-\alpha S)^{-1} =& 
\frac{1}{\det(I-\alpha S)}\text{adj}(I - \alpha S)\\
=& \frac{1}{1-\alpha^2}\begin{bmatrix}
    1 & \frac{\alpha}{(x+2)^{1/2}}  & \frac{\alpha}{(x+2)^{1/2}}  & \frac{\alpha x^{1/2}} {(x+2)^{1/2}} \\
    \frac{\alpha}{(x+2)^{1/2}} & 1-\frac{\alpha^2(x+1)x}{(x+2)} & \frac{\alpha^2}{x+2} & \frac{\alpha^2 x^{1/2}}{(x+2)}
    \\
    \frac{\alpha}{(x+2)^{1/2}} & \frac{\alpha^2}{x+2} &  1-\frac{\alpha^2(x+1)x}{(x+2)} & \frac{\alpha^2 x^{1/2}}{(x+2)}
    \\
    \frac{\alpha x^{1/2}} {(x+2)^{1/2}} & \frac{\alpha^2 x^{1/2}}{(x+2)} & \frac{\alpha^2 x^{1/2}}{(x+2)} & 1-\frac{2\alpha^2}{x+2}
\end{bmatrix}.
\end{align*}
Recall that the prediction on the unlabeled point is $\hat y_4 = \text{argmax}F^*_4$, so we calculate 
\begin{align*}
\hat y_4 = \text{sign} (F^*_{4, 2} - F^*_{4, 1}) 
=& \text{sign}\left(
\frac{\alpha x ^{1/2}(2\alpha - (x+2)^{1/2})}
{(1+\alpha)(x+2) }\right)\\
=& \text{sign}\left(x ^{1/2}(2\alpha - (x+2)^{1/2})\right). \tag{since $\alpha \in (0,1)$ and $x \geq 0$}
\end{align*}
Solving the equation $x ^{1/2}(2\alpha - (x+2)^{1/2}) = 0$, we know that the prediction changes and only change when $\alpha = \frac{(x+2)^{1/2}}{2}$. Let $x = 4x^2 - 2 \geq 0$, then $\hat y_4 = 0$ when $\alpha < x$ and $\hat y_4 = 1$  when $\alpha \geq x$, which completes our proof. 
\end{proof}

\begin{lemma}\label{lem:alpha alternating sign}
Given integer $n > 1$ and a sequence of $\alpha$'s such that $0 < \alpha_0 < 1/\sqrt{2} \leq  \alpha_1 < \alpha_2 < ... < \alpha_n < 1$, there exists a real-valued witness $w>0$ and a problem instance of partially labeled $4n$ points, such that for $0 \leq i \leq n/2-1$, $l<w$ for $\alpha \in (\alpha_{2i}, \alpha_{2i+1})$, and $l>w$ for $\alpha \in (\alpha_{2i+1}, \alpha_{2i+2})$.
\end{lemma}
\begin{proof}
We create $n$ connected components using the previous lemma, with $x_i = \alpha_i$. Let the unlabeled point in the $i$th component be $u_i$, then as $\alpha$ increases from $\alpha_{i-1}$ to $\alpha_i$, the predicted label of $u_i$ changes from $a$ to $b$. If the sequence $u_i$ is alternately labeled with $u_1$ labeled $a$, then the loss increases and decreases alternately as all the labels turn to $b$ when $\alpha$ increases to $\alpha_n$. Specifically, as $\alpha$ increases to $\alpha_1$, the point $u_1$ has predicted label changes from $a$ to $b$. Since its true label is $a$ and the predicted labels of other $u_i$'s remain unchanged, our loss slightly increases to $l_{max}$. Then, as $\alpha$ increases to $\alpha_2$, the point $u_2$ gets correctly labeled as $b$ and all other nodes unchanged, which slightly decreases our loss back to $l_{min}$. The loss thus fluctuates between $l_{min}$ and $l_{max}$. We therefore set the witness $w$ as something in between. 
$$w = 
\frac{l_{min}+l_{max}}{2}.$$
\end{proof}

We now finish the lower bound proof for Theorem~\ref{thm:alpha upper bound}.

\begin{proof}
Arbitrarily choose $n^\prime = n/4$ (assumed to be a power of 2 for convenient representation) real numbers $1/\sqrt{2} \leq \alpha_{[000..1]} < \alpha_{[000...10]} < ...< \alpha_{[111...11]} < 1$. The indices are increasing binary numbers of length $m = \log n^\prime$. We create $m$ labeling instances that can be shattered by these $\alpha$ values. For the $i$-th instance $(X^{(i)}, Y^{(i)})$, we apply the previous lemma with a subset of the $\alpha_b$ sequence that corresponds to the $i$-th bit flip in $b$, where $b \in \{0,1\}^m$. For example, $(X^{(1)}, Y^{(1)})$ is constructed using $r_{[100..0]}$, and $(X^{(2)}, Y^{(2)})$ is constructed using $r_{[010..0]}, r_{[100.0]}$ and $r_{[110..0]}$. The lemma gives us both the instances and the sequence of witnesses $w_i$.  

This construction ensures $\text{sign}(l_{\alpha_b}-w_i) = b_i$ for all $b \in \{0,1\}^m$. Thus the pseudo-dimension is at least $\log n^\prime = \log n - \log 4 = \Omega(\log n)$
\end{proof}

\subsection{Proof of Theorem~\ref{thm:lambda upper bound}}\label{appendix:lambda upper bound}
\paragraph{Upper Bound.}
    The closed-form solution $F^*$ is given by 
    $$ F^*= (S + \lambda I_n \Delta_{i \in L})^{-1} \lambda Y.$$
    By Lemma~\ref{lem:degree of determinant}, each coefficient $[F^*]_{ij}$ is a rational polynomial in $\lambda$ of the form $P_{ij}(\lambda) / Q(\lambda) $ where $ P_{ij}$ and $Q$ are polynomials of degree $n$ and $n$ respectively. Note that the prediction for each node $ i \in [n]$ is $\hat{y}_i = \argmax_{j\in c} f_{ij} $ and thus the prediction on any node in the graph can only change when $sign(f_{ij} - f_{ik}) $ changes for some $j, k \in [c]$. Note that $f_{ij} - f_{ik}$ is also a rational polynomial $(P_{ij}(\lambda) - P_{ik}(\lambda))/Q(\lambda) $ where both the numerator and denominator are polynomials in $\lambda$ of degree $n$, meaning the sign can change at most $O(n) $ times. As we vary $\lambda$, we have that the prediction on a single node can change at most $\binom c2 O(n) \in O(n c^2)$. Across the $m$ problem instances and the $n$ total nodes, we have at most $O(n^2c^2m) $ distinct values of our loss function. The pseudo-dimension $m$ thus satisfies $2^m \leq O(n^2c^2m)$, or $m = O(\log n)$  

\paragraph{Lower Bound.} We construct the small connected component of $4$ nodes as follows:
\begin{lemma}\label{lem:lambda connected components}
Given $\lambda' \in (1, \infty)$, there exists a labeling instance $(X, Y)$ with $4$ nodes, such that the predicted label of the unlabeled points changes only at $\lambda = \lambda'$ as $\lambda$ varies in $(0,\infty)$.
\end{lemma}
\begin{proof}
We use binary labeling $a$ and $b$.  We have two points labeled $a$ (namely $a_1, a_2$), and one point labeled $b$ (namely $b_1$). We also have an unlabeled point $u$ connected to $b_1$ with edge weight $x \geq 0 $ and connected with both $a_1$ and $a_2$ with edge weight $1$. That is, the weight matrix and initial labels are $$W = 
\begin{bmatrix}
    0 & 0 & 1 & 0 \\
    0 & 0 & 1 & 0 \\
    1 & 1 & 0 & x \\
    0 & 0 & x & 0
\end{bmatrix}, Y = \begin{bmatrix}
    -1 \\
    -1 \\
    0 \\
    1 
\end{bmatrix}.$$

The closed form solution is $$F^* = (S + \lambda I_n \Delta_{i \in L})^{-1} \lambda Y $$
where $S = \text{diag}(W \vec{1}_n) - W $. We now calculate: 
\begin{align*}
S &=
\begin{bmatrix}
    1 & 0 & -1 & 0 \\
    0 & 1 & -1 & 0 \\
    -1 & -1 & x + 2 & -x \\
    0 & 0 & -x & x
\end{bmatrix}
\\
S + \lambda I_n \Delta_{i \in L} &= \begin{bmatrix}
    1 + \lambda & 0 & -1 & 0 \\
    0 & 1+ \lambda & -1 & 0 \\
    -1 & -1 & x + 2 & -x \\
    0 & 0 & -x & x+ \lambda
\end{bmatrix}
% \\
% (S + \lambda I_n \Delta_{i \in L})^{-1}
% &=\begin{bmatrix}
%     \frac{{\lambda^2x + 2\lambda^2 + 3\lambda x + \lambda + x}}{{\lambda^3x + 2\lambda^3 + 4\lambda^2x + 2\lambda^2 + 3\lambda x}} & \frac{{\lambda + x}}{{\lambda^3x + 2\lambda^3 + 4\lambda^2x + 2\lambda^2 + 3\lambda x}} & \frac{{\lambda + x}}{{\lambda^2x + 2\lambda^2 + 3\lambda x}} & \frac{{x}}{{\lambda^2x + 2\lambda^2 + 3\lambda x}} \\
%     \frac{{\lambda + x}}{{\lambda^3x + 2\lambda^3 + 4\lambda^2x + 2\lambda^2 + 3\lambda x}} & \frac{{\lambda^2x + 2\lambda^2 + 3\lambda x + \lambda + x}}{{\lambda^3x + 2\lambda^3 + 4\lambda^2x + 2\lambda^2 + 3\lambda x}} & \frac{{\lambda + x}}{{\lambda^2x + 2\lambda^2 + 3\lambda x}} & \frac{{x}}{{\lambda^2x + 2\lambda^2 + 3\lambda x}} \\
%     \frac{{\lambda + x}}{{\lambda^2x + 2\lambda^2 + 3\lambda x}} & \frac{{\lambda + x}}{{\lambda^2x + 2\lambda^2 + 3\lambda x}} & \frac{{\lambda^2 + \lambda x + \lambda + x}}{{\lambda^2x + 2\lambda^2 + 3\lambda x}} & \frac{{\lambda x + x}}{{\lambda^2x + 2\lambda^2 + 3\lambda x}} \\
%     \frac{{x}}{{\lambda^2x + 2\lambda^2 + 3\lambda x}} & \frac{{x}}{{\lambda^2x + 2\lambda^2 + 3\lambda x}} & \frac{{\lambda x + x}}{{\lambda^2x + 2\lambda^2 + 3\lambda x}} & \frac{{\lambda x + 2\lambda + x}}{{\lambda^2x + 2\lambda^2 + 3\lambda x}}
% \end{bmatrix}
% \\ f = (S + \lambda I_n \Delta_{i \in L})^{-1} \lambda Y &= \begin{bmatrix}
%     \frac{{\lambda x}}{{\lambda^2x + 2\lambda^2 + 3\lambda x}} - \lambda\left(\frac{{\lambda + x}}{{\lambda^3x + 2\lambda^3 + 4\lambda^2x + 2\lambda^2 + 3\lambda x}}\right) - \lambda\left(\frac{{\lambda^2x + 2\lambda^2 + 3\lambda x + \lambda + x}}{{\lambda^3x + 2\lambda^3 + 4\lambda^2x + 2\lambda^2 + 3\lambda x}}\right) \\
%     \frac{{\lambda x}}{{\lambda^2x + 2\lambda^2 + 3\lambda x}} - \lambda\left(\frac{{\lambda + x}}{{\lambda^3x + 2\lambda^3 + 4\lambda^2x + 2\lambda^2 + 3\lambda x}}\right) - \lambda\left(\frac{{\lambda^2x + 2\lambda^2 + 3\lambda x + \lambda + x}}{{\lambda^3x + 2\lambda^3 + 4\lambda^2x + 2\lambda^2 + 3\lambda x}}\right) \\
%     -2\lambda\left(\frac{{\lambda + x}}{{\lambda^2x + 2\lambda^2 + 3\lambda x}}\right) + \lambda\left(\frac{{\lambda x + x}}{{\lambda^2x + 2\lambda^2 + 3\lambda x}}\right) \\
%     -2\frac{{\lambda x}}{{\lambda^2x + 2\lambda^2 + 3\lambda x}} + \frac{{\lambda x + 2\lambda + x}}{{\lambda^2x + 2\lambda^2 + 3\lambda x}}
% \end{bmatrix}
\end{align*}
Recall that the prediction on the unlabeled point is $\hat y_3 = \text{sign} ([F*]_{32} - [F^*]_{31})$, so we calculate 
\begin{align*}
\hat y_3 = \text{sign}(F*]_{32} - [F^*]_{31})
=& \text{sign}\left(
-2\lambda\left(\frac{{\lambda + x}}{{\lambda^2x + 2\lambda^2 + 3\lambda x}}\right) + \lambda\left(\frac{{\lambda x + x}}{{\lambda^2x + 2\lambda^2 + 3\lambda x}}\right) \right)\\
=& \text{sign}\left(-2\lambda(\lambda + x) + \lambda(\lambda x + x) \right) \tag{since $\lambda > 0$ and $x \geq 0$}
\\=& \text{sign}\left(-2(\lambda + x) + (\lambda x + x) \right) \tag{since $\lambda > 0$}
\\=& \text{sign}\left(-2\lambda - x + \lambda x  \right)
\end{align*}
Solving the equation $-2\lambda - x + \lambda x = 0$, we know that the prediction changes and only change when $\lambda = \frac{x}{x-2}$. Let $x = \frac{2\lambda}{\lambda - 1} \geq 0$, then $\hat y_3 = -1$ when $\lambda < \lambda'$ and $\hat y_3 = 1$  when $\lambda \geq \lambda'$, which completes our proof. 
\end{proof}

% \begin{lemma}
% Given integer $n > 1$ and a sequence of $\lambda$'s such that $0 < \lambda_0 < 1 \leq  \lambda_1 < \lambda_2 < ... < \lambda_n < 1$, there exists a real-valued witness $w>0$ and a problem instance of partially labeled $4n$ points, such that for $0 \leq i \leq n/2-1$, $l<w$ for $\lambda \in (\lambda_{2i}, \lambda_{2i+1})$, and $l>w$ for $\lambda \in (\lambda_{2i+1}, \lambda_{2i+2})$.
% \end{lemma}
% \begin{proof}
% From the Lemma~\ref{lem:lambda connected components} we have $n$ connected components with $\lambda_i = \lambda'_i$. Let the unlabeled point in the $i$th component be $u_i$, then as $\lambda$ increases from $\lambda_{i-1}$ to $\lambda_i$, the predicted label of $u_i$ changes from $a$ to $b$. If the sequence $u_i$ is alternately labeled with $u_1$ labeled $a$, then the loss increases and decreases alternately as all the labels turn to $b$ when $\lambda$ increases to $\lambda_n$. Specifically, as $\lambda$ increases to $\lambda_1$, the point $u_1$ has predicted label changes from $a$ to $b$. Since its true label is $a$ and the predicted labels of other $u_i$'s remain unchanged, our loss slightly increases to $l_{max}$. Then, as $\lambda$ increases to $\lambda_2$, the point $u_2$ gets correctly labeled as $b$ and all other nodes unchanged, which slightly decreases our loss back to $l_{min}$. The loss thus fluctuates between $l_{min}$ and $l_{max}$. We therefore set the witness $w$ to between these values:
% $$w = 
% \frac{l_{min}+l_{max}}{2}.$$
% \end{proof}
The remaining proof is exactly the same as Lemma~\ref{lem:alpha alternating sign} and Theorem~\ref{thm:alpha upper bound}, by simply replacing notation $\alpha$ with $\lambda$.

\subsection{Proof of Theorem~\ref{thm:delta upper bound}}\label{appendix:delta upper bound}
\paragraph{Upper Bound.}
Using \Cref{lem: delta F form}, we know that each entry of $F^*$ is
$$F^*_{ij}(\delta) = \frac{1}{\det(I- c \cdot S)}\sum_{k=1}^n [\text{adj}(I- c \cdot S)]_{ik}Y_{kj} = \frac{1}{\det(I- c \cdot S)}\sum_{k=1}^n (a_{ik}Y_{kj})\exp(\delta \ln (d_{i}^{-1}d_{k})).$$ 

Recall that the prediction on a node is made by $\hat y_i = \text{argmax}(F^*_i
)$, so the prediction changes only when 
\begin{align*}
    F^*_{ic_1} - F^*_{ic_2}
    &= \frac{1}{\det(I- c \cdot S)}\left(\sum_{k=1}^n (a_{ik}Y_{kc_1})\exp(\delta \ln (d_{i}^{-1}d_{k})) - \sum_{k=1}^n (a_{ik}Y_{kc_2})\exp(\delta \ln (d_{i}^{-1}d_{k}))\right) \\
    &= \frac{1}{\det(I- c \cdot S)}\left( \sum_{k=1}^n (a_{ik}(Y_{kc_1}-Y_{kc_2}))\exp(\delta \ln (d_{i}^{-1}d_{k}))\right)\\
    &= 0.
\end{align*} By Lemma~\ref{lem:roots of exp sum}, $F^*_{ic_1} - F^*_{ic_2}$ has at most $n-1$ roots, so the prediction on node $i$ can change at most $n-1$ times. As $\delta$ vary, the prediction can change at most $\binom{c}{2} O(n) \in O(nc^2)$ times. For $n$ nodes and $m$ problem instances, this implies that we have at most $O(mn^2c^2)$ distinct values of loss. The pseudo-dimension $m$ then satisfies $2^m \leq O(mn^2c^2)$, or $m = O(\log nc)$.

\paragraph{Lower Bound} We construct the small connected component as follows:

\begin{lemma} \label{lem:delta connected components}
Consider when $c \geq 1/2$. Given $x \in [\log(2c)/\log(2),1)$, there exists a labeling instance $(G, L)$ with $4$ nodes, such that the predicted label of the unlabeled points changes only at $\delta = x$ as $\delta$ varies in $(0,1)$.
\end{lemma}

\begin{proof}
    We use binary labeling $a$ and $b$. We have two points labeled $a$ (namely $a_1, a_2$), and one point labeled $b$ (namely $b_1$) connected with both $a_1$ and $a_2$ with edge weight $1$. We also have an unlabeled point $u$ connected to $b_1$ with edge weight $x \geq 0$. That is, the affinity matrix and initial labels are $$W = \begin{bmatrix}
    0 & 1 & 1 & x\\
    1 & 0 & 0 & 0\\
    1 & 0 & 0 & 0\\
    x & 0 & 0 & 0
\end{bmatrix}, Y = \begin{bmatrix}
    1 & 0 \\
    0 & 1 \\
    0 & 1 \\
    0 & 0 
\end{bmatrix}.$$
Recall that the score matrix is $$F^* = (I - c \cdot S)^{-1}Y ,$$ 
where $S = D^{-\delta}WD^{\delta - 1}$ and $D$ is diagonal with $D_{ii} = \sum_i W_{ij}$. We now calculate: 
\begin{align*}
S = D^{-\delta}WD^{\delta - 1} =&
\begin{bmatrix}
0 & (x+2)^{-\delta} & (x+2)^{-\delta} & x^{\delta}(x+2)^{-\delta} \\
(x+2)^{-\delta} & 0 & 0 & 0\\
(x+2)^{-\delta} & 0 & 0 & 0\\
x^{\delta}(x+2)^{-\delta} & 0 & 0 & 0
\end{bmatrix},
\end{align*}
\begin{align*}
\det(I-c \cdot S) 
&= \det \begin{bmatrix}
    1 & -c(x+2)^{-\delta} & -c(x+2)^{-\delta} & -cx^{\delta}(x+2)^{-\delta} \\
    -c(x+2)^{-\delta} & 1 & 0 & 0\\
    -c(x+2)^{-\delta} & 0 & 1 & 0\\
    -cx^{\delta}(x+2)^{-\delta} & 0 & 0 & 1
\end{bmatrix} \\
&= 1-c^2 \neq 0,
\end{align*}
so $(I-c\cdot S)$ is invertible on our instance.

Recall that the prediction on the unlabeled point is $\hat y_4 = \text{argmax}F^*_4$, so we calculate 
\begin{align*}
\hat y_4 = \text{sign} (F^*_{4, 2} - F^*_{4, 1}) 
= \text{sign}\left(
\frac{c \cdot x ^{1-\delta}(2c - (x+2)^{\delta})}
{(1-c^2)(x+2) }\right)
= \text{sign}\left(2c - (x+2)^\delta\right). \tag{since $c \in (0,1)$, and $x \geq 0$}
\end{align*}
Solving the equation $2c-(x+2)^\delta = 0$, we know that the prediction changes and only change when $\delta = \frac{\ln(2c)}{\ln(x+2)}$. Since $x \leq \ln(2c)/\ln(2) \leq 1$, we can let $x = \left(2c\right)^{1/x} - 2 \geq 0$, then $\hat y_4 = 0$ when $\alpha < x$ and $\hat y_4 = 1$  when $\alpha \geq x$, which completes our proof. 
\end{proof}
