\section{Additional Subroutines from Fischer \etal}
\label{sec:additional_subroutines}

For reasons of formatting and convenience, we include in this section all of the coloring subroutines referred to in \Cref{algo:Delta-main} and the proof of \Cref{lem:high_degree_delta_col}.

\begin{algorithm}[H]
\caption{{\sc SlackGeneration} (node $v$)} \label{algo:slack}

Activate $v$ w.p. $1/20$ and choose a color $c_v$ from $[\Delta]$.

If $v$ is activated and none of the (activated) neighbors of $v$ have chosen $c_v$ to try, then color $v$ with color $c_v$.
\end{algorithm}

\begin{algorithm}[H]
 \caption{{\sc Color-$V_*$}} \label{algo:color-v-star}


Color $V_*$ by calling the (Deg+1)-List-coloring algorithm (Algorithm~\ref{alg:LSColorReduce}).



\end{algorithm}

\begin{algorithm}[H]
 \caption{{\sc Color-$\mathcal{O}$}} \label{algo:color-o}
 Let $\mathcal{X}$ denotes the set of (uncolored) nodes in ordinary almost-cliques that have unit slack.

Color $\mathcal{O} \setminus \mathcal{X}$ by calling the (Deg+1)-List-Coloring algorithm (Algorithm~\ref{alg:LSColorReduce}).

Color $\mathcal{X}$ by calling the (Deg+1)-List-Coloring algorithm (Algorithm~\ref{alg:LSColorReduce}).

\end{algorithm}

\begin{algorithm}[H]
 \caption{{\sc Color-$\mathcal{R}$}} \label{algo:color-r}
 For each runaway almost-clique $C$, let $\mathcal{Y}_C$ be the set of neighbors of the escape node $e_C$ in $C$. Let $\mathcal{Y}$ denote the union of all $\mathcal{Y}_C$'s.

 Color $\mathcal{R} \setminus \mathcal{Y}$, by calling the (Deg+1)-List-Coloring algorithm (Algorithm~\ref{alg:LSColorReduce}).


 Color $\mathcal{Y}$ by calling the (Deg+1)-List-Coloring algorithm (Algorithm~\ref{alg:LSColorReduce}).
 

\end{algorithm}



\begin{algorithm}[H]
 \caption{{\sc Color-$\mathcal{N}$}} \label{algo:color-n}
 A nice almost-clique is of \emph{type 1} if $C$ contains a special node (protector or escape node). A nice almost-clique is of \emph{type 2} if it is a clique and has no special node. A nice almost-clique is of \emph{type 3} if it has a non-edge. Let $\mathcal{N}_1, \mathcal{N}_2$ and $\mathcal{N}_3$ be the set of nodes in type 1 nice almost-cliques, type 2 nice almost-cliques and  type 3 nice almost-cliques respectively.

 {\sc Color-$\mathcal{N}_1$} (Algorithm~\ref{algo:color-n-1}): colors $\mathcal{N}_1 \setminus (\mathcal{P}\cup \mathcal{E})$.

  {\sc Color-$\mathcal{N}_2$} (Algorithm~\ref{algo:color-n-2}): colors $\mathcal{N}_2$.

   {\sc Color-$\mathcal{N}_2$} (Algorithm~\ref{algo:color-n-3}): colors $\mathcal{N}_2$.

\end{algorithm}


\begin{algorithm}[H]
 \caption{{\sc Color-$\mathcal{N}_1$}} \label{algo:color-n-1}
 For a type 1 nice almost-clique $C$, let $\mathcal{Z}_C$ denote the neighbors of the special node of $C$ in $C$. Let $\mathcal{Z}_1$ denote the union of all $\mathcal{Z}_C$'s.

 Color $\mathcal{N}_1 \setminus (\mathcal{Z}_1 \cup \mathcal{P} \cup \mathcal{E})$, by calling the (Deg+1)-List-Coloring algorithm (Algorithm~\ref{alg:LSColorReduce}).


 Color $\mathcal{Z}_1$ by calling the (Deg+1)-List-Coloring algorithm (Algorithm~\ref{alg:LSColorReduce}). 


\end{algorithm}


\begin{algorithm}[H]
 \caption{{\sc Color-$\mathcal{N}_2$}} \label{algo:color-n-2}
 For a type 2 nice almost-clique $C$, let $v_c$ be an arbitrary simplicial node in $C$. Let $\mathcal{Z}_2$ denote the set of all such $v_c$'s.

Color $\mathcal{N}_2 \setminus \mathcal{Z}_2$, by calling the (Deg+1)-List-Coloring algorithm (Algorithm~\ref{alg:LSColorReduce}).

Color $ \mathcal{Z}_2$, by calling the (Deg+1)-List-Coloring algorithm (Algorithm~\ref{alg:LSColorReduce}).


\end{algorithm}
\newpage
\begin{algorithm}[H]
 \caption{{\sc Color-$\mathcal{N}_3$}} \label{algo:color-n-3}
 For a type 3 nice almost-clique $C$, consider a pair $\{x_C,y_C\}$ that is a non-edge. Let $P$ be the set of all such pairs. Make a virtual graph $H$ with  $P$ as the node set and there is an edge between $\{x_{C_1},y_{C_1}\}$ and $\{x_{C_2},y_{C_2}\}$ if a node of $\{x_{C_1},y_{C_1}\}$ is a neighbor of a node $\{x_{C_2},y_{C_2}\}$. The palette of a node $\{x_C,y_C\}$ is the intersection of the current palettes of the nodes $x_C$ and $y_C$.

 Color $H$ by calling by calling the (Deg+1)-List-Coloring algorithm (Algorithm~\ref{alg:LSColorReduce}). For a pair $\{x_C,y_C\}$ in $P$, assign the color of $\{x_C,y_C\}$ to both $x_C$ and $y_C$.  

  For a type 3 nice almost-clique $C$, let $\mathcal{I}_C$ denote the set of common neighbors of $x_C$ and $y_C$ in $C$, and $\mathcal{I}$ be the union of all $\mathcal{I}_C$'s.

 Color $\mathcal{N}_3\setminus \mathcal{I}$ by calling the (Deg+1)-List-Coloring algorithm (Algorithm~\ref{alg:LSColorReduce}). 

  Color $\mathcal{I}$ by calling by calling the (Deg+1)-List-Coloring algorithm (Algorithm~\ref{alg:LSColorReduce})

 


\end{algorithm}

\begin{algorithm}[H]
 \caption{{\sc Color-$\mathcal{GP}$}} \label{algo:color-g}
 For a guarded almost-clique $C$, consider the protector node $p_C$ and a non-neighbor $u_C$ of $p_C$ in $C$, and form a pair $\{u_C,p_C\}$. Let $Q$ be the set of all such pairs. Make a virtual graph $H$ with  $Q$ as the node set and there is an edge between $\{u_{C_1},p_{C_1}\}$ and $\{u_{C_2},p_{C_2}\}$ if one node of $\{u_{C_1},p_{C_1}\}$ is a neighbor of $\{u_{C_2},p_{C_2}\}$. The palette of a node $\{u_C,p_C\}$ is the intersection of the palettes of the nodes $u_C$ and $p_C$.

  Color $H$ by calling by calling the (Deg+1)-List-Coloring algorithm (Algorithm~\ref{alg:LSColorReduce}). For  pair $\{u_C,p_C\}$ in $Q$, assign the color of $(x_C,y_C)$ to both $x_C$ and $y_C$.

  For a guarded almost-clique $C$, let $\mathcal{J}_C$ denote the set of common neighbors of $u_C$ and $p_C$ in $C$, and $\mathcal{J}$ be the union of all $\mathcal{J}_C$'s.

 Color $\mathcal{G}\setminus \mathcal{J}$ by calling  the (Deg+1)-List-Coloring algorithm (Algorithm~\ref{alg:LSColorReduce}).

  Color $\mathcal{J}$ by calling by calling the (Deg+1)-List-Coloring algorithm (Algorithm~\ref{alg:LSColorReduce})

\end{algorithm}


\begin{algorithm}[H]
 \caption{{\sc Color-$\mathcal{E}$}} \label{algo:color-e}
  Color $\mathcal{E}$, by calling the (Deg+1)-List-Coloring algorithm (Algorithm~\ref{alg:LSColorReduce})
\end{algorithm}
\section{Introduction}
\label{sec:intro}

Graph coloring is one of the most fundamental problems in algorithms which has been studied extensively in various settings for several decades. It has been playing an especially prominent role in distributed and parallel computing, not only because of its numerous applications, but also since some variants of coloring problems naturally model typical symmetry breaking problems, as frequently encountered in decentralized systems (see, e.g., \cite{BE13} for an overview of early advances).
Parallel graph coloring has been studied since the 1980s \cite{karloff1985fast}, and nowadays $(\Delta+1)$-coloring and $(2\Delta-1)$-edge-coloring are considered among the most fundamental graph problems in the area (here and throughout the paper, $\Delta$ refers to the maximum degree of the input graph).

In this paper, we study the parallel complexity of a natural generalization of the $(\Delta+1)$-coloring problem\footnote{$(\Delta+1)$-coloring problem is to color a graph of maximum degree $\Delta$ using $\Delta+1$ colors.}, the problem of \emph{(degree+1)-list coloring} (\DILC). In the \DILC problem, for a given undirected graph $G = (V,E)$, each node has an input palette of acceptable colors of size one more than its degree, and the objective is to find a proper coloring using these palettes. While it is easy to design a simple linear-time (sequential) greedy algorithm for \DILC, the parallel and distributed complexity of \DILC is less well understood. Clearly, the problem is not easier than the $(\Delta+1)$-coloring problem and its variant the $(\Delta + 1)$-list coloring proble
\footnote{In $(\Delta + 1)$-list coloring each node has a palette of $\Delta+1$ many colors for its disposal (rather than $\deg+1$ many, as in~\DILC).}, but the challenge of dealing with nodes having color palettes of greatly different sizes seems to make the problem significantly more difficult. Still, it has been observed that by using techniques developed in \cite{FHK16,Kuhn20}, one can deterministically reduce \DILC to $(\Delta + 1)$-list coloring with only an $O(\log\Delta)$ multiplicative and $O(\log^*n)$ additive overhead in the running time (number of rounds). However the logarithmic complexity gap is still significant and until very recently, this gap has been elusive for the most efficient distributed and parallel algorithms for \DILC. The first advance (in the distributed setting) has come only very recently, when Halld{\'o}rsson, Kuhn, Nolin, and Tonoyan \cite{hknt_local_d1lc} presented a randomized $O(\log^3\log n)$-rounds distributed algorithm for \DILC in the \LOCAL distributed model, matching the state-of-the art complexity for the simpler $(\Delta+1)$-coloring problem due to Chang, Li, and Pettie \cite{CLP20}). In another very recent work, Halld{\'o}rsson, Nolin, and Tonoyan \cite{HNT22} extended the framework and showed that \DILC can be solved
in $O(\log^5\log n)$-rounds in the distributed \CONGEST model, matching the state-of-the-art complexity for the simpler $(\Delta+1)$-coloring problem in \CONGEST by Halld{\'{o}}rsson, Kuhn, Maus, and Tonoyan~\cite{HKMT21}).

In this paper we demonstrate that the \DILC problem can be efficiently solved in the \emph{Massively Parallel Computation} (\MPC) model with sublinear local space, matching the complexity of the state-of-the-art \MPC algorithms for the simpler $(\Delta+1)$-coloring and $(\Delta+1)$-list coloring problems.

\paragraph{MPC model}
The \emph{Massively Parallel Computation} (\MPC) model, introduced over a decade ago by Karloff \etal \cite{KSV10}, is a nowadays standard theoretical model for parallel algorithms. The model has been developed on the basis of its successful modeling of parallel and distributed frameworks such as MapReduce \cite{mapreduce}, Hadoop \cite{hadoop}, Dryad \cite{dryad}, and Spark \cite{spark}, and it shares many similarities to classical models of parallel computation (e.g., PRAM) and distributed models (e.g., \CONGESTEDC). In this paper, we focus on the \emph{low-space/sublinear} MPC regime, in which machines have local space $\lspace = O(n^{\phi})$ for any arbitrary constant $\phi \in (0,1)$, where $n$ is the number of nodes in the graph. This model has attracted a lot of attention recently, see, e.g., \cite{ANOY14,ASSWZ18,BKS17,BHH19,BKM20,BBDFHKU19,CC22,CFGUZ19,CDP20,CDP21,CDPsparse,CLMMOS18,GGKMR18,GKU19,GU19,GSZ11,LMOS20}. Recent works have provided many algorithms for fundamental graph problems such as connectivity, approximate matching, maximal matching, maximal independent set, and $(\Delta+1)$ coloring.

It is known that the $(\Delta+1)$-coloring algorithm due to Chang \etal \cite{CFGUZ19} can be combined with the network decomposition result of \cite{RG20} to obtain a randomized $O(\log\log\log n)$-round \MPC algorithm, which is currently the state-of-the-art bound for $(\Delta+1)$-list-coloring. Recently, Czumaj \etal \cite{CDP21} showed that this result can be derandomized, even for the list-coloring version of the problem. In this paper we show that these bounds can be matched on a sublinear local space \MPC for the more general \DILC problem.

We first show how to combine the \DILC framework for the \LOCAL model due to Halld{\'o}rsson \etal \cite{hknt_local_d1lc} with the techniques developed in earlier works on the \MPC model, to obtain a randomized \MPC algorithm for \DILC working in $O(\log\log\log n)$ rounds, w.h.p.
Then we present a more challenging part (our main contribution) and show that the $O(\log\log\log n)$-rounds randomized \MPC algorithm can be efficiently derandomized in the \MPC model.

\begin{theorem}[\textbf{Main}]
\label{thm:main:deter}
Let $\spacexp \in (0,1)$ be an arbitrary constant. There exists a deterministic algorithm that, for every $n$-node graph $G=(V,E)$, solves the \DILC problem using $O(\log\log\log n)$ rounds, in the low-space \MPC model with local space $\lspace = O(n^{\spacexp})$ and global space $O(m+n^{1+\spacexp})$.
\end{theorem}

Observe that the bound in \Cref{thm:main:deter} matches the state-of-the-art bound for the complexity of the simpler $(\Delta+1)$-coloring problem in the (low-space) \MPC model (see \cite{CFGUZ19} for the randomized bound and \cite{CDP21} for the deterministic bound). Furthermore, the recently developed framework connecting the complexity of \LOCAL and low-space \MPC algorithms (see \cite{GKU19,CDPcompstab}), provides some evidence that our upper bound cannot be asymptotically improved, unless the complexity of the $(\Delta+1)$-coloring problem is
$(\log\log n)^{o(1)}$
in the \LOCAL model. This is because \cite{GKU19,CDPcompstab} show that for a class of component stable algorithms and conditioned on the so-called 1-vs-2-cycles conjecture, no low-space \MPC algorithm can run faster than the logarithm of the complexity of \LOCAL algorithms. (Still, even conditioned on the 1-vs-2-cycles conjecture, it might be conceivable that a non-component stable randomized \MPC low-space algorithm can solve $(\Delta+1)$-coloring in $o(\log\log\log n)$ rounds, and further, we do not have any good enough \LOCAL lower bounds for coloring, and so maybe an
$(\log\log n)^{o(1)}$-rounds
\LOCAL algorithm is possible.)
Finally, notice that Roughgarden \etal \cite{RVW18} showed that proving any super-constant lower bound in the low-space \MPC for any problem in \model{P} would separate $\model{NC}^1$ from \model{P}, making any \emph{unconditional} super-constant (low-space \MPC) lower bound unlikely.

As an application of our framework for \DILC developed in \Cref{thm:main:deter}, we consider the related problem of $\Delta$-coloring graphs of maximum degree $\Delta \ge 3$ that contain no clique of size $\Delta+1$ (cf. Brooks' theorem \cite{Brooks41}). Recently, Fischer, Halld{\'o}rsson, and Maus \cite{FHM23} devised a general approach allowing to reduce in a constant number of rounds (in \LOCAL, \CONGEST, \and \CONGESTEDC) the $\Delta$-coloring problem to a constant number of instances of (degree+1)-list coloring, assuming $\Delta = \omega(\log^3 n)$ (for the \CONGESTEDC algorithm $\Delta = \omega(\log^{4+\varepsilon} n)$ is required, for a small constant $\varepsilon$). This was then extended to obtain a randomized $O(\text{poly}(\log\log n))$-rounds \LOCAL algorithm to $\Delta$-color any graph of maximum degree $\Delta \ge 3$ that does not contain a clique of size $\Delta+1$. In this paper we obtain a similar result for the \MPC model.

\begin{theorem}
\label{thm:delta_coloring}
Let $\spacexp \in (0,1)$ be an arbitrary
constant. There exists a randomized algorithm that for any $n$-node graph $G = (V,E)$ with maximum degree $\Delta \ge 3$ that does not contain a $(\Delta+1)$-clique, finds a $\Delta$-coloring of $G$ in $O(\log\log\log n)$ rounds on the low-space \MPC model with local space $\lspace =O(n^{\spacexp})$ and global space $O(m+n^{1+\spacexp})$.
\end{theorem}

In this paper, our main focus is on the proof of \Cref{thm:main:deter} and we defer its application to the proof of \Cref{thm:delta_coloring} to \Cref{sec:Delta-coloring}.










\subsection{Setting and notation}
\label{sec:setting}




For $k \in \mathbb{N}$, $[k]$ denotes the set $\{1,\ldots,k\}$. For $a,b \in \mathbb{N}$, $[a,b]$ denotes the set of integers in $\{a,a+1,\ldots,b\}$. We consider a graph
$G=(V,E)$ with $V$ as the node set and $E$ as the edge set with $|V|=n$ and $|E|=m$.
The set of neighbors of a node $v$ is denoted by $N(v)$ and the degree of a node $v$ is denoted by $d(v)$. For a node $v$, $\Psi(v)$ denotes the list of colors in the color palette of node $v$ and $p(v)$ denotes the size of $\Psi(v)$. The maximum degree of any node in $G$ is denoted by $\Delta$. As we go on coloring the nodes of the graph $G$, the graph will change and the color palettes of the nodes will also change. Often, we denote the current (rather than the input) graph by $G$. For all graphs we consider, we have $p(v)\geq d(v)+1$. For a subset $X \subseteq V$, $G[X]$ denotes the subgraph induced by $X$ and $m(X)$ denotes the number of edges in $G[X]$.

\paragraph{Degree+1 list coloring (D1LC)}


The \textbf{degree+1 list coloring (D1LC) problem} is for a given graph $G = (V,E)$ and given color palettes $\Psi(u)$ assigned to each node $u \in V$, such that $\size{\Psi(u)} \geq d(u)+1$, the objective to find a proper coloring of nodes in $G$ such that each node as assigned to a color from its color palette (and, as in proper coloring, no edge in $G$ is monochromatic).

\paragraph{Massively Parallel Computation model.}
We consider the \emph{Massively Parallel Computation} (\MPC) model, which is a parallel system with some number of machines, each of them having some local space~\lspace.
At the beginning of computation, each machine receives some part of the input, with the constraint that it must fit within its local space. In our case, for the \DILC problem, the input is a set of $n$ nodes, $m$ edges, and $n$ color palettes of total size $O(n+m)$. Hence we will require that the number of machines is $\Omega(\tfrac{n+m}{\lspace})$, for otherwise the input would not fit the system. The computation on an \MPC proceeds in synchronous rounds. In each round, each machine processes its local data and performs an arbitrary local computation on its data without communicating with other machines. At the end of each round, machines can exchange messages. Each message is sent only to a single machine specified by the machine that is sending the message. All messages sent and received by each machine in each round have to fit into the machine's local space. Hence, the total number of messages sent by any machine and received by any machine is bounded by \lspace, and the total amount of communication across the whole \MPC is bounded by \lspace times the number of machines. At the beginning of the next round, each machine can process all messages received in the previous round. When the algorithm terminates, machines collectively output the solution.


Observe that if a single machine can store the entire input, then any problem (like, e.g., \DILC) can be solved in a single round, since no communication is required. In order for our algorithms to be as scalable as possible, normally one wants to consider graph problems in the low-space regime, where local space $\lspace = n^{\spacexp}$ for any given constant $\spacexp \in (0,1)$. (There has been some research considering also the case when $\lspace = \Theta(n)$, or even when $\lspace = n^{1+\spacexp}$ (in which case one wants to study the case that $\lspace \ll m$) but we will not consider such setting in the current paper.) We will require that the number of machines is not significantly more than required,
specifically that it is $\widetilde{O}(n+\frac{m}{\lspace})$ (note that the optimal amount would be $\widetilde{O}(\frac{n+m}{\lspace})$, but our algorithm requires the ability to assign a machine to each node).
A major challenge in the design of \MPC algorithms in the low-space regime is that the local space of each machine is (possibly) not sufficient to store all the edges incident to a single node. This constraint naturally requires an \MPC algorithm to rely on extensive communication between machines, and most of the techniques known are based on some graph sparsification. It is important to note here that even in the low-space regime, the \MPC model is known \cite{GSZ11} to be stronger than the PRAM model, e.g., it is known that sorting\footnote{Here we consider sorting of $N$ objects on an \MPC with local space $N^{\gamma}$ and on $N^{1-\gamma}$ machines, for any constant $\gamma>0$.} (and in fact, many related tasks, like prefix sum computation) can be performed in a constant number of rounds, even deterministically, see \cite{GSZ11}. Observe that with this tool, we can gather nodes' neighborhoods to contiguous blocks of machines, and learn their degrees, in a constant number of rounds, and that we can assume, without loss of generality, that the input can be distributed arbitrarily on the first $\Theta(\tfrac{n+m}{\lspace})$ machines on an \MPC.

\subsection{Technical contribution}

While the \MPC model is notionally stronger than \LOCAL, writing \MPC algorithms which work in an exponentially lower number of rounds than their \LOCAL counterparts is often challenging, due to the limited local space of each machine in the \MPC model. In particular, nodes of high degree often cannot hope to collect their $2$-hop neighborhood in the \MPC model, and this renders many common subroutines in \LOCAL algorithms challenging (for example, in the setting of coloring, computing an almost-clique decomposition).

We overcome this technical difficulty using a deterministic recursive sparsification approach similar to \cite{CDP20,CDP21} where we repeatedly partition an instance of \DILC with maximum degree $\Delta$ into $n^\delta$ \DILC instances, each with maximum degree $\Delta / n^\delta$. Here $\delta \in (0,1)$ is a constant less than $\phi$ to be fixed later and $\phi$ is our local space parameter, i.e., $s=O(n^\phi)$. All but one of these instances are \emph{valid} \DILC instances and so can be solved immediately (using this recursive sparsification if the degree is still too high), and the final instance can only be solved when it is determined which colors are unused in the other instances. In this way, we can reduce the maximum degree of the \DILC instances which we have to solve to an arbitrarily small polynomial in $n$.

For \DILC instances with degrees between $\log^7 n$ and some arbitrarily small polynomial $n^\delta$, we show that a recent algorithm of \cite{hknt_local_d1lc} can be efficiently simulated in the \MPC setting using information only in the $2$-hop neighborhood of each node. Then, we show that the randomized subroutines from the algorithm of \cite{hknt_local_d1lc} can be efficiently derandomized, as they all satisfy a particular set of properties. The derandomization in this paper uses pseudorandom generators (PRGs) combined with the method of conditional expectations. The main difficulty is that PRGs restricted by the local space requirement of low-space \MPC will fail on some non-trivial proportion of nodes even if the base randomized procedure succeeds with high probability. Furthermore, in general this can cause a chain reaction of nodes failing to meet the success requirements of the randomized procedure. To overcome this difficulty, we formalize in \Cref{prng-derandomizable} a collection of properties that allow us to fully derandomize a procedure the PRG, which we prove in \Cref{lem:fullderand}. This lemma may be of independent interest as a powerful black-box derandomization technique in \MPC.

For instances with degree lower than $\log^7 n$, we can use an existing algorithm of Czumaj et al. \cite{CDP21}.

\paragraph{$\Delta$-coloring.}
The \DILC problem has been studied extensively in distributed and parallel setting largely thanks to its use in the designing of algorithms for other coloring problems. For example, \DILC has been frequently used as a subroutine in $(\Delta + 1)$-coloring algorithms (in the post-shattering step). Fischer \etal \cite{FHM23} observed recently that one can solve the $\Delta$-coloring problem using algorithms for \DILC: the approach due to Fischer \etal relies on a reduction from $\Delta$-coloring to a constant number of D1LC instances. We combine our \DILC \MPC algorithm from \Cref{thm:main:deter} with the reduction from \cite{FHM23} to obtain an $O(\log\log\log n)$-rounds algorithm for the $\Delta$-coloring problem (\Cref{thm:delta_coloring}). In particular, we show that the delicate almost-clique decomposition and classification required in the $\Delta$-coloring algorithm of Fischer \etal \cite{FHM23} can be performed in $O(1)$ rounds of sublinear \MPC. While the \CONGEST result of Fischer \etal \cite{FHM23} holds only for $\Delta = \omega(\log^3 n)$, our result does not require this restriction (i.e., holds for all values of $\Delta \geq 3$ provided the graph is $\Delta$-colorable, that is, provided it does not contain a $(\Delta+1)$-clique), exploiting the ability of \MPC to perform ``graph exponentiation'' when the maximum degree is low. (Because of space constraints, the analysis is deferred to \Cref{sec:Delta-coloring}.)

\subsection{Related work}

Our work continues a long line of research studying the parallel and distributed computing of graph coloring problems. For the references to earlier work on distributed coloring algorithms we refer to the monograph by Barenboim and Elkin \cite{BE13} (see also the influential papers by Linial \cite{Linial87,Linial92}). We will discuss here only more recent advances (and final results) for the four most relevant coloring problems, $(\Delta+1)$-coloring, $(\Delta+1)$-list-coloring, \DILC, and $\Delta$-coloring, leaving more detailed discussion and more references to the papers listed below. After extensive research in distributed computing for the $(\Delta+1)$-coloring problem, we understand its complexity for the \LOCAL, \CONGESTEDC, and also for the \MPC model, quite well. For \CONGESTEDC (and also for \MPC with linear memory, $\lspace = O(n)$), we know now how to solve $(\Delta+1)$-coloring in a constant number of rounds, see \cite{chang2018optimal,CDP20}. For the \LOCAL model, after a very long line of research, the current state of the art upper bound for randomized algorithm is by Ghaffari and Kuhn \cite{GK21} (see also \cite{RG20}), which achieves $O(\log^3\log n)$ round complexity w.h.p. There is no deterministic $o(\log n)$-round \LOCAL algorithm known. 

For the low-space \MPC, it is known that the $(\Delta+1)$-coloring algorithm due to Chang et al. \cite{CFGUZ19} can be combined with the network decomposition result of \cite{RG20} to obtain a randomized $O(\log\log\log n)$-round \MPC algorithm, which is currently the state-of-the-art bound for the problem; this result was derandomized by Czumaj \etal \cite{CDP21}. Furthermore, all algorithms mentioned above for $(\Delta+1)$-coloring can be extended to solve also $(\Delta+1)$-list-coloring. 

For the \DILC problem, which is a generalization of $(\Delta+1)$-coloring and $(\Delta+1)$-list-coloring, there have not been many comparable bounds until the very recent work of Halld{\'o}rsson \etal \cite{hknt_local_d1lc}. In \cite{hknt_local_d1lc}, by significantly extending the earlier approached for $(\Delta+1)$-coloring (in particular, to allow to efficiently deal with nodes of various degrees), it is shown that \DILC can be solved in $O(\log^3\log n)$ rounds in \LOCAL, w.h.p.
As a byproduct, the framework of Halld{\'o}rsson \etal \cite{hknt_local_d1lc} can be incorporated into a constant-round \MPC algorithm assuming the local \MPC space is slightly \emph{superlinear}, i.e., $O(n \log^4n)$ \cite[Corollary 2]{hknt_local_d1lc}.
A similar bound has been recently obtained for
the \CONGEST model in \cite{HNT22}, solving \DILC in $O(\log^5\log n)$ \CONGEST rounds, w.h.p.
We make extensive use of the framework laid out by Halld{\'o}rsson \etal \cite{hknt_local_d1lc} in their algorithm for \LOCAL in the design of our \DILC algorithm.

Our work relies also on some sparsification and derandomization techniques developed for parallel and distributed coloring algorithms.
With respect to derandomization,
our techniques are closely related to the recent papers by Czumaj \etal \cite{CDP20,CDP21}. The central approach underlying our analysis of derandomization using a pseudorandom generator has been used extensively before, though in the context of \MPC algorithms it has been used only recently in \cite{CDP21}.

In comparison to the perhaps more famous coloring problems listed above, relatively little has been written about the problem of $\Delta$-coloring in the parallel and distributed setting. In the \LOCAL model, the problem was first discussed in a paper by Panconesi and Srinivasan \cite{local_nature_delta_coloring}, and that algorithm was recently revisited and improved by Ghaffari \etal \cite{DBLP:journals/dc/GhaffariHKM21}.
In distributed models with communication bandwidth restrictions (and so, the setting closer to the model we consider here), the first breakthrough came only recently, in a paper by Fischer, Halld{\'o}rsson, and Maus \cite{FHM23}. They gave a \CONGEST algorithm which gives a $\Delta$-coloring of the graph in $O(\poly \log \log n)$ rounds if $\Delta = \omega(\log^3 n)$, which decreases to $O(\log^* n)$ rounds if $\Delta = \omega(\log^{21} n)$. Their result for large values of $\Delta$ was a substantial improvement over the state-of-the-art in any distributed model ($O(\log n)$ rounds previously being the best known), and combined with the result given by \cite{DBLP:journals/dc/GhaffariHKM21} for sub-polylogarithmic values of $\Delta$, they also achieved a $O(\poly\log\log n)$ round algorithm for $\Delta$-coloring in the \LOCAL model.

We are not aware of any direct prior work on $\Delta$-coloring in any model of parallel computing. An $O(\log n)$ round algorithm for \PRAM follows straightforwardly from the $O(\log n)$-round MIS algorithm of Luby \cite{Luby86}: one can find an MIS and defer coloring those nodes, the remaining nodes all gain at least $1$ slack, and therefore form a $(\Delta+1)$-coloring instance. This algorithm can be transformed into a $O(\log n)$ round \MPC algorithm, using well-known results relating to the simulation of \PRAM algorithms in \MPC. We note that the complexity of our algorithm represents a double-exponential improvement over this.

\hide{\subsection*{Notations}

\begin{itemize}
\item $G(V,E)$-- Graph with $n$ nodes.

\item $d_{G}(v)$--- The degree of $v$ in $G$.

\item $N_{G}(v)$--- The neighborhood of $v$ in $G$.

\item $G[W]$--- The subgraph of $G$ induced by $W \subseteq V$.

\item $O(n^\spacexp)$--- The space bound of each machine \peter{I changed this because the parts from \cite{CDP20} already use $\delta$ for something else, and we'll need to compare $\delta$ and $\eps$ later.}
\end{itemize}
}

\input{parts/preliminaries}

\input{parts/d1lc_randomised}

\input{parts/d1lc_deterministic}


\input{parts/Coloring-delta}

\bibliographystyle{alpha}

\section{Proof of Lemma~\ref{lem:derandomizable-subroutines}}
\label{sec:proof-of-4-2}

We conclude with the proof of \Cref{lem:derandomizable-subroutines}: that the randomized coloring subroutines from the algorithm of Halld{'o}rsson \etal are normal $(O(1),\Delta_G)$-round distributed procedures.

\begin{proof}
First, we note that at the start of any algorithm, nodes may be assumed to have information about which sets they are members of ($V_{\text{sparse}}$, $V_{\text{uneven}}$, etc.), as by earlier simulation arguments ({see \Cref{obs:spse}}) this can be computed in $O(1)$ rounds in \LOCAL (and \MPC, for low enough degree). We may also assume that nodes have information about the value of any of the parameters in \Cref{def:parameters} for themselves and any of their neighbours ({see \Cref{lem:computing_parameters}}). Note that this is, in total, $O(\Delta_{G}^2)$ words of information for each node $v$: a constant number of words for each parameter, $O(\Delta_{G})$ words for set membership, multiplied by the number of neighbors of $v$, which is $O(\Delta_{G})$.

We look at each in turn:
\begin{itemize}
    \item \textbf{\sc TryRandomColor}: The procedure takes $O(1)$ rounds of \LOCAL. Nodes need no other words of input information. Nodes only use information from their neighbors, and each node uses $O(\log \Delta_{G})$ random bits to select a color from its palette. Note that the computation is $O(\Delta_{G})$. The output information is either a color with which $v$ has permanently colored itself, or {\sc Fail} if it does not color itself: this is clearly $O(\Delta_{G})$ words of information. The success property is that the slack of a node increases from $c \cdot d_{G}(v)$ for some constant $c$ to $2 \cdot d_{G}(v)$ \cite[Lemma~26]{hknt_local_d1lc}. This is computable in time linear in the degree of a node and based only on the output of the immediate neighbors of a node. The property succeeds for each node with probability $p=\exp(\Omega(s(v)))$, and by our precondition, $s(v)$ is at least polylogarithmic so this is with high probability. Nodes deferring does not affect the algorithm, since this creates at least as much as slack as it would if all nodes participated.

    \item \textbf{\sc GenerateSlack}: This takes $O(1)$ rounds of \LOCAL and nodes need no other information at the beginning of the procedure. During the procedure, nodes only use information from their neighbors and $\widetilde{O}(\Delta_{G})$ random bits (to determine whether the node is sampled and if so, what color is attempted). The output of the procedure is either the color with which $v$ permanently colored itself, or {\sc Fail}. The success of the procedure is quite complicated: depending on the type of node, a different guarantee on the eventual slack is required. We note, however, that these guarantees all succeed with high probability and are computable using only information in the immediate neighborhood of $v$ \cite[Lemmas~10, 11, 13, 15, 17, 18]{hknt_local_d1lc}. Deferring nodes creates temporary slack and so does not hurt the procedure.

    \item \textbf{\sc PutAside}: The algorithm consists of $1$ round of \LOCAL, and nodes need no additional information. During the procedure, nodes only use information from their neighbours, and use $O(\polylog(n)) \in O(\Delta_{G})$ (by construction) random bits to independently sample nodes into $S_C$. The output of the procedure is one word (whether $v \in P_C$). A node succeeds if the put-aside set of the almost-clique containing it (say $C$) satisfies $\size{P_C} = \Omega(\ell^2)$ for some polylogarithmic $\ell$ \cite[Lemma~5]{hknt_local_d1lc}. This can be checked using the $2$-hop neighborhood of $v$ because the diameter of $C$ is at most $2$. The property succeeds with high probability. If we defer nodes then this can hurt the size of the put-aside per se, however this is not an issue: the fact that we are deferring nodes creates the necessary slack that the put-aside set would be creating anyway. We have to be careful however, to color the deferred nodes \emph{after} the put-aside set, otherwise we lose the property that the put-aside sets in different almost-cliques are independent.

    \item \textbf{\sc SynchColorTrial}: This procedure takes $1$ round of \LOCAL. Nodes need no additional information about their neighborhood beforehand. Nodes only use information from their immediate neighbors during the procedure (since leader $x_C$ neighbors all nodes in the set of inliers $I_C$), and in the worst case (i.e.~if the node in question is $x_C$), nodes need $\widetilde{O}(\Delta_{G})$ random bits to permute their palette. The output of the procedure for a node $v \in I_C$ for some $C$ is either a color in $\Psi_v$ or {\sc Fail}. The success property for the procedure is that, for the almost-clique $C$ that $v$ is in, the number of nodes that return {\sc Fail} is bounded by $O(t)$, where $t$ is some polylogarithmic value \cite[Lemma~7]{hknt_local_d1lc}. This happens with high probability ($\exp(-t)$). Nodes dropping out can only help the algorithm, as they are not counted as nodes which fail to be colored. 
    
    \item \textbf{\sc SlackColor}:  There are three parts to the algorithm. First, {\sc TryRandomColor} is called in order to amplify the slack of each node (which is linear in the degree: a prerequisite of {\sc SlackColor}). Then two loops of {\sc MultiTrial} instances are run.\\
    The $O(1)$ calls to {\sc TryRandomColor} are normal $(O(1), \Delta_{G})$-distributed procedures by the item above.\\
    Each iteration of the first for-loop of executions of {\sc MultiTrial} takes $O(1)$ \LOCAL rounds. Nodes need no additional input information. During the execution of the loop, nodes only use information from their immediate neighbors, and only use $O(\Delta_{G})$ random bits to select a subset of their palette. The output of the procedure for $v$ is either a color in $\Psi_v$ or {\sc Fail}, which is one word. Nodes succeed (with high probability) if $d(v) \le \slack{v}/\min(2^{x_i},\rho^\kappa)$ \cite[Lemma~27]{hknt_local_d1lc}. This is, again, computable using only the output of the node and output information of the immediate neighborhood. Nodes deferring reduce the degree (temporarily), and so they cannot cause the property to be unsatisfied.\\
    The second for-loop of {\sc MultiTrial} instances are normal distributed procedures for the same reason: the success property is $d(v) \le \slack{v}/\min(\rho^{(i+1) \cdot \kappa},\rho)$, and also satisfies the requirements of \Cref{prng-derandomizable}. So, overall \textbf{\sc SlackColor} consists of a sequence of $O(\log^* \Delta)$ normal $(O(1), \Delta_G)$-round distributed procedures.\qedhere

\end{itemize}
\end{proof}
\section{Derandomization of Algorithm for Low-Degree Instances}
\section{Derandomization for Low-Degree}
\label{sec:deterministic_d1lc}

In this section, we give a deterministic \MPC algorithm, for \DILC for a graph $G$ with maximum degree at most $n^{7\delta}$, that takes $O(\log \log \log n)$ rounds on an \MPC with $\lspace=O(n^\phi)$ and $O(n^{1+\phi})$ global space~\footnote{The number of edges is always at most $O(n^{1+\phi})$.}. This is possible due to the simulation of  deterministic steps in \cite{hknt_local_d1lc} when the maximum degree is at most $\sqrt{\lspace}$ as discussed in \Cref{sec:randomized-d1lc} and the fact that the randomized steps in \cite{hknt_local_d1lc} can be efficiently derandomized. In particular, we will discuss the derandomization of \textsc{TryRandomColor}(\Cref{alg:try_random_color}), \textsc{SlackColor} (\Cref{alg:slack_color}), {\sc GenerateSlack} (\Cref{alg:generate_slack}), \textsc{PutAside} (\Cref{alg:put-aside}) and \textsc{SyncColorTrial} (\Cref{alg:synch}) by proving that all of them are $(O(\log ^*n),\Delta)$-round  distributed procedures and arguing that such procedures can be efficiently derandomized by using pseudorandom generators.  We can show that, in one iteration with $O(\log^* n)$ rounds, the number of uncolored nodes will be reduced by a factor of $n^{\Omega(1)}$ (that are deferred to be colored in the later iterations). So, by repeating the process for $O(1)$ iterations, we have $O(\log n)$ nodes left to be colored. Another thing to note that, we are considering the algorithm of \Cref{sec:overview_of_hknt} here when the degree of the nodes is at least $\log ^7 n$. So, we always defer the coloring of the nodes whose degree is at most $\log ^7 n$. So, at the end we have a graph (of at most $n$ nodes) having maximum degree $\log ^7 n$. Note that this can be colored in $O(\log \log \log n)$ rounds due to a result of \cite{CDP21}.

 In this section, graph $G$ is often is not clear from the context. So, we use $n_G$ to denote the number of nodes in $G$. For a node $v$, $N_G(v)$ denote the set of neighbors of $v$ in $G$ and $d_G(v)$ denotes the degree of $v$ in $G$. We denote the maximum degree of any node in $G$ by $\Delta_G$. For a node $v$ and $c \in \mathbb{N}$, $N^c_G(v)$ denotes the nodes in the $c$-hop neighborhood of $v$.

\subsection{Normal distributed procedures}

First, we define the notion of $(c,\Delta_G)$-round randomized distributed procedure.

\begin{definition}\label{prng-derandomizable}
A \emph{normal $(c,\Delta_G)$-round distributed procedure} running on a graph $G$, of maximum degree \emph{at most} $\Delta_G$, is a procedure in the randomized \LOCAL model satisfying the following criteria:

\begin{itemize}
\item The procedure takes $c$ rounds of \LOCAL.
\item At the beginning of the procedure, nodes $v$ have $O(\Delta_G^c)$-word sets of input information $\textsc{In}_v$ associated with them.
\item During the procedure, nodes only use information from their $c$-hop neighborhood (i.e. from $\textsc{In}_v \cup \bigcup_{u\in N^c_G(v)} \textsc{In}_u\}$ and $O(\Delta_G^{2c})$ random bits, and perform $O(\Delta_G^{8c})$ computation.
\item The output of the procedure is a new $O(\Delta_G^c)$-word set of output information $\textsc{Out}_v$ for each node.
\item The procedure has a `success property' (computable with $O(\Delta_G^{8c})$ computation) that determines whether it has been successful for a particular node, based on the output information of that node and its $c$-hop neighborhood (formally, $\textsc{SP} : \textsc{Out}_v \cup \bigcup_{u\in N^c_G(v)} \textsc{Out}_u \rightarrow \{T,F\}$).
\item At the end of the procedure, then for any node $v$, $$\Prob{\textsc{SP}(\textsc{Out}_v \cup \bigcup_{u\in N^c_G(v)} \textsc{Out}_u ) = T} \ge 1-\frac{1}{2n_G}.$$
\item Each success property is such that `deferring' nodes to be dealt with later cannot cause the property to become unsatisfied. That is, if we denote by $\textsc{Defer}_w$ a special status indicating that $w$ is to be deferred (which is not to be conferred by the procedure itself), then adding $\textsc{Defer}_w$ to $w$'s output information should not cause any other nodes' success properties to become unsatisfied. Formally:

$$
 \textsc{SP}\left(\textsc{Out}_v \cup \bigcup_{u\in N^c_G(v)} \textsc{Out}_u \right) = T  
\Rightarrow \forall w\in N^c_G(v), \textsc{SP}\left(\textsc{Out}_v\cup \{\textsc{Defer}_w\} \cup \bigcup_{u\in N^c_G(v)} \textsc{Out}_u \right) = T\enspace.
$$

\end{itemize}
\end{definition}

In the following lemma, we show that a number of randomized subroutines of \cite{hknt_local_d1lc} are in fact normal $(O(1), \Delta_G)$-round distributed procedures. Under these circumstances, we can use a pseudorandom generator, combined with the method of conditional expectations, to derandomize the algorithm. The details of PRG and the derandomization  will be discussed in \Cref{sec:derand}.
\begin{lemma}
\label{lem:derandomizable-subroutines}
The subroutines
{\sc TryRandomColor} (\Cref{alg:try_random_color}),
{\sc GenerateSlack} (\Cref{alg:generate_slack}),
{\sc PutAside} (\Cref{alg:put-aside}),
and {\sc SynchColorTrial} (\Cref{alg:synch})
are all normal $(O(1), \Delta_{G})$-round distributed procedures. The subroutine {\sc SlackColor} (\Cref{alg:slack_color}) consists of a sequence of $O(\log^* \Delta)$ normal $(O(1), \Delta_G)$-round distributed procedures.
\end{lemma}

\begin{proof}
First, we note that at the start of any algorithm, nodes may be assumed to have information about which sets they are members of ($V_{\text{sparse}}$, $V_{\text{uneven}}$, etc.), as by earlier simulation arguments ({see \Cref{obs:spse}}) this can be computed in $O(1)$ rounds in \LOCAL (and \MPC, for low enough degree). We may also assume that nodes have information about the value of any of the parameters in \Cref{def:parameters} for themselves and any of their neighbours ({see \Cref{lem:computing_parameters}}). Note that this is, in total, $O(\Delta_{G}^2)$ words of information for each node $v$: a constant number of words for each parameter, $O(\Delta_{G})$ words for set membership, multiplied by the number of neighbors of $v$, which is $O(\Delta_{G})$.

We look at each in turn:
\begin{itemize}
    \item \textbf{\sc TryRandomColor}: The procedure takes $O(1)$ rounds of \LOCAL. Nodes need no other words of input information. Nodes only use information from their neighbors, and each node uses $O(\log \Delta_{G})$ random bits to select a color from its palette. Note that the computation is $O(\Delta_{G})$. The output information is either a color with which $v$ has permanently colored itself, or {\sc Fail} if it does not color itself: this is clearly $O(\Delta_{G})$ words of information. The success property is that the slack of a node increases from $c \cdot d_{G}(v)$ for some constant $c$ to $2 \cdot d_{G}(v)$ \cite[Lemma~26]{hknt_local_d1lc}. This is computable in time linear in the degree of a node and based only on the output of the immediate neighbors of a node. The property succeeds for each node with probability $p=\exp(\Omega(s(v)))$, and by our precondition, $s(v)$ is at least polylogarithmic so this is with high probability. Nodes deferring does not affect the algorithm, since this creates at least as much as slack as it would if all nodes participated.

    \item \textbf{\sc GenerateSlack}: This takes $O(1)$ rounds of \LOCAL and nodes need no other information at the beginning of the procedure. During the procedure, nodes only use information from their neighbors and $\widetilde{O}(\Delta_{G})$ random bits (to determine whether the node is sampled and if so, what color is attempted). The output of the procedure is either the color with which $v$ permanently colored itself, or {\sc Fail}. The success of the procedure is quite complicated: depending on the type of node, a different guarantee on the eventual slack is required. We note, however, that these guarantees all succeed with high probability and are computable using only information in the immediate neighborhood of $v$ \cite[Lemmas~10, 11, 13, 15, 17, 18]{hknt_local_d1lc}. Deferring nodes creates temporary slack and so does not hurt the procedure.

    \item \textbf{\sc PutAside}: The algorithm consists of $1$ round of \LOCAL, and nodes need no additional information. During the procedure, nodes only use information from their neighbours, and use $O(\polylog(n)) \in O(\Delta_{G})$ (by construction) random bits to independently sample nodes into $S_C$. The output of the procedure is one word (whether $v \in P_C$). A node succeeds if the put-aside set of the almost-clique containing it (say $C$) satisfies $\size{P_C} = \Omega(\ell^2)$ for some polylogarithmic $\ell$ \cite[Lemma~5]{hknt_local_d1lc}. This can be checked using the $2$-hop neighborhood of $v$ because the diameter of $C$ is at most $2$. The property succeeds with high probability. If we defer nodes then this can hurt the size of the put-aside per se, however this is not an issue: the fact that we are deferring nodes creates the necessary slack that the put-aside set would be creating anyway. We have to be careful however, to color the deferred nodes \emph{after} the put-aside set, otherwise we lose the property that the put-aside sets in different almost-cliques are independent.

    \item \textbf{\sc SynchColorTrial}: This procedure takes $1$ round of \LOCAL. Nodes need no additional information about their neighborhood beforehand. Nodes only use information from their immediate neighbors during the procedure (since leader $x_C$ neighbors all nodes in the set of inliers $I_C$), and in the worst case (i.e.~if the node in question is $x_C$), nodes need $\widetilde{O}(\Delta_{G})$ random bits to permute their palette. The output of the procedure for a node $v \in I_C$ for some $C$ is either a color in $\Psi_v$ or {\sc Fail}. The success property for the procedure is that, for the almost-clique $C$ that $v$ is in, the number of nodes that return {\sc Fail} is bounded by $O(t)$, where $t$ is some polylogarithmic value \cite[Lemma~7]{hknt_local_d1lc}. This happens with high probability ($\exp(-t)$). Nodes dropping out can only help the algorithm, as they are not counted as nodes which fail to be colored. 
    
    \item \textbf{\sc SlackColor}:  There are three parts to the algorithm. First, {\sc TryRandomColor} is called in order to amplify the slack of each node (which is linear in the degree: a prerequisite of {\sc SlackColor}). Then two loops of {\sc MultiTrial} instances are run.\\
    The $O(1)$ calls to {\sc TryRandomColor} are normal $(O(1), \Delta_{G})$-distributed procedures by the item above.\\
    Each iteration of the first for-loop of executions of {\sc MultiTrial} takes $O(1)$ \LOCAL rounds. Nodes need no additional input information. During the execution of the loop, nodes only use information from their immediate neighbors, and only use $O(\Delta_{G})$ random bits to select a subset of their palette. The output of the procedure for $v$ is either a color in $\Psi_v$ or {\sc Fail}, which is one word. Nodes succeed (with high probability) if $d(v) \le \slack{v}/\min(2^{x_i},\rho^\kappa)$ \cite[Lemma~27]{hknt_local_d1lc}. This is, again, computable using only the output of the node and output information of the immediate neighborhood. Nodes deferring reduce the degree (temporarily), and so they cannot cause the property to be unsatisfied.\\
    The second for-loop of {\sc MultiTrial} instances are normal distributed procedures for the same reason: the success property is $d(v) \le \slack{v}/\min(\rho^{(i+1) \cdot \kappa},\rho)$, and also satisfies the requirements of \Cref{prng-derandomizable}. So, overall \textbf{\sc SlackColor} consists of a sequence of $O(\log^* \Delta)$ normal $(O(1), \Delta_G)$-round distributed procedures.\qedhere

\end{itemize}
\end{proof}


\subsection{PRGs and derandomization}\label{sec:derand}

A \emph{Pseudorandom Generator (PRG)} is a function that takes a short \emph{random seed} and produces a longer string of \emph{pseudorandom} bits, which are computationally indistinguishable from truly random bits. We use the following definition from \cite{Vadhan12} for indistinguishability

\begin{definition
[Definition 7.1 in \cite{Vadhan12}]
\label{def:computational-indistinguishability}
Random variables $X$ and $Y$ taking values in $\{0,1\}^m$ are $(t,\eps)$ \textbf{indistinguishable} if for every non-uniform algorithm $T:\{0,1\}^m\rightarrow \{0,1\}$ running in time at most $t$, we have $|\Pr[T(X)=1]-\Pr[T(Y)=1]| \le \eps$.
\end{definition}

Let $U_k$ denote a random variable generated uniformly at random from $\{0,1\}^k$. Then pseudorandom generators are defined as follows:

\begin{definition}[PRG, Definition 7.3 in \cite{Vadhan12}]
\label{def:PRG}
A deterministic function $\mathcal{G}:\{0,1\}^d \to \{0,1\}^m$ is an \textbf{$(t,\eps)$ pseudorandom generator (PRG)} if (1) $d < m$, and (2) $\mathcal{G}(U_d)$ and $U_m$ are $(t,\eps)$ indistinguishable
\end{definition}

A simple application of the probabilistic method can show the existence of PRGs with optimal parameters:

\begin{proposition}[Proposition 7.8 in \cite{Vadhan12}]
\label{prop:perfect-PRG}
For all $t \in \mathbb{N}$ and $\eps>0$, there exists a (non-explicit) $(t,\eps)$ pseudorandom generator $\mathcal{G}: \{0,1\}^d \to \{0,1\}^t$ with seed length $d = \Theta(\log t+\log(1/\eps))$.
\end{proposition}

As shown in \cite{CDPcompstab}, such a PRG can be computed using relatively low space (but exponential computation).

\begin{lemma}[Lemma 35 of \cite{CDPcompstab}, arXiv version]
\label{lem:prg-alg}
For all $t \in \mathbb{N}$ and $\eps>0$, there exists an algorithm for computing the $(t,\eps)$ PRG of \Cref{prop:perfect-PRG} in time $\exp(poly(t/\eps))$ and space $poly(t/\eps)$.
\end{lemma}

For any integer $k\in \mathbb{N}$, let $G^k$ be the $k$-power of $G$, that is, $G^k$ is the graph having the same node set as $G$ and any pair of nodes at a distance at most $k$ in $G$ form an edge in $G^k$.

\begin{lemma}\label{lem:phasederand}
There is a constant $C$ such that, given an $O(\Delta_G^{8c})$-coloring of $G^{4c}$, any normal $(c,\Delta_G)$-round distributed procedure on a graph $G$ can be derandomized in $O(1)$ round of \MPC, using $\lspace=O(\Delta_G^{cC})$ space per machine and global $O(n_G \Delta_G^{cC})$, with the following properties:

\begin{itemize}
\item At most $\frac 12+n_G \Delta_G^{-11c}$ nodes are deferred.
\item All non-deferred nodes $v$ satisfy the success property.
\end{itemize}
\end{lemma}

\begin{proof}
First, for each node $v$ we collect the input information of its $8c$-hop neighborhood ($\textsc{In}_v \cup \bigcup_{u\in N^c_G(v)} \textsc{In}_u$) to a dedicated machine. This takes $c$ rounds, and requires $O(\Delta_G^{8c} \cdot \Delta_G^c) = O(\Delta_G^{11c})$ space per machine and $O(n_G \Delta_G^{11c})$ global space. Our aim is then to simulate the procedure using randomness produced by the $(\Delta_G^{11c}, \Delta_G^{-11c})$ PRG implied by \Cref{prop:perfect-PRG}. This PRG has seed length $d = \Theta(\log \Delta_G)$ and requires $poly(\Delta_G^{11c})$ space to construct and store. We choose $C$ so that this $poly(\Delta_G^{11c})$ term is $O(\Delta_G^{cC})$.

The PRG, when evaluated on a seed, produces a string of $\Delta_G^{11c}$ pseudorandom bits. We use the provided $O(\Delta_G^{8c})$-coloring of $G^{4c}$ to split this string into the input randomness for each node. By definition of a normal $(c,\Delta_G)$-round distributed procedure, each node requires $O(\Delta_G^{2c})$ random bits, and we provide a node colored $i$ in the $O(\Delta_G^{8c})$-coloring with the $i^{th}$ chunk of $O(\Delta_G^{2c})$ bits from the PRG's output. This means that any pair of nodes within distance $4c$ receive disjoint chunks of pseudorandom bits.

The output of the PRG under a random seed is $(\Delta_G^{11c},\Delta_G^{-11c})$ indistinguishable from a uniform distribution. Consider the process of simulating the procedure for all nodes within distance $c$ of a node $v$, and then evaluating the success property $\textsc{SP}(\textsc{Out}_v \cup \bigcup_{u\in N^c_G(v)} \textsc{Out}_u ) $. By definition of a normal $(c,\Delta_G)$-round distrib\-ut\-ed procedure, this combined process requires $O(\Delta_G^{9c})$ computation, and depends on the input information and randomness of nodes up to distance $2c$ from $v$ (and note that all nodes within this radius receive different chunks of the PRG's output as their pseudorandom bits). This combined process can therefore be run on one \MPC machine. Furthermore, it can be considered a non-uniform algorithm using at most $\Delta_G^{9c}$ computation, and so is `fooled' by the PRG. This means that the output ($T$ or $F$, indicating whether the success property is satisfied) at $v$ differs with probability at most $\Delta_G^{-11c}$ from what it would be under full randomness. That is, the output will be $F$ with probability at most $\frac{1}{2n_G}+ \Delta_G^{-11c}$.

The expected number of nodes who do not satisfy the success property, when simulating the procedure using the PRG with a random seed, is therefore at most $\frac 12+n_G \Delta_G^{-11c}$. Since this value is an aggregate of functions computable by individual machines, using the method of conditional expectations (as implemented for low-space MPC in \cite{CDPsparse,CDP20}) we can deterministically select a seed for the PRG for which the number of nodes which do not satisfy the success property is at most its expectation (i.e. at most $\frac 12+n_G \Delta_G^{-11c}$), in $O(1)$ rounds.

Then, we simply mark the nodes which do not satisfy the success property as deferred. In doing so, we meet the conditions of the lemma.
\end{proof}

\begin{lemma}\label{lem:fullderand}
There is a constant $C$ such that, given an $O(\Delta_G^{8c})$-coloring of $G^{4c}$, any normal $(c,\Delta_G)$-round distributed procedure on a graph $G$ with $\Delta_G \le n^{7\delta}$ can be derandomized in $O(1)$ rounds of \MPC, using $\lspace=O( n^{7\delta cC})$ space per machine and global space $O(n_G \cdot n^{7\delta cC})$, with all nodes satisfying the success condition of the procedure.
\end{lemma}

\begin{proof}
We repeatedly apply \Cref{lem:phasederand} to the graph induced by the deferred nodes, in each case using $\Delta_G = n^{7\delta}$ as an upper bound on the maximum degree (even though the actual maximum degree may be significantly lower). After $r$ iterations of this, the number of remaining deferred nodes is at most $\frac 34 + n\cdot n^{-11cr\cdot7\delta}$. Taking $r=1/\delta$ (which is $O(1)$), we then have fewer than $1$ remaining deferred node, i.e.~all nodes satisfy the success condition.
\end{proof}

\begin{lemma}\label{lem:phasecolor}
The algorithm of \cite{hknt_local_d1lc} for \DILC for nodes of degree at least $\log^7 n$, running on a graph $G$, of maximum degree \emph{at most} $n^{7\delta}$, can be derandomized in $O(\log^* n)$ rounds. The process uses $O(n^{7\delta cC})$ space per machine and $O(n_G \cdot n^{7\delta cC})$ global space, where $C$ is as in \Cref{lem:phasederand}.
\end{lemma}

\begin{proof}
We use $\Delta_G = n^{7\delta}$ as an upper bound on the maximum degree $G$, and first compute the $O(\Delta_G^{8c})$-coloring of $G^{4c}$ required for \Cref{lem:fullderand}. This can be done in $O(\log^* n)$ rounds by simulating Linial's \cite{Linial92} deterministic \LOCAL algorithm round-by-round. Then, since we showed in \Cref{lem:derandomizable-subroutines} that the algorithm of \cite{hknt_local_d1lc} can be expressed as a sequence of $O(\log^* n)$ \emph{phases}, each of which are normal $(c, \Delta_G )$-round distributed procedures for some constant $c$, we can apply \Cref{lem:fullderand} to derandomize each phase in turn, resulting in a deterministic coloring of all nodes.
\end{proof}

Since the algorithm of \cite{hknt_local_d1lc} is only for nodes of degree at least $\log^7 n$, note that our repeated derandomization steps can leave a set $D$ of remaining uncolored nodes with degrees of at most $\log^7 n$.

\subsection{\DILC when the maximum degree is $n^{7\delta}$}

The remaining algorithm is simple: we apply \Cref{lem:phasecolor} to deterministically color all nodes of degree at least $\log^7 n$ (with some nodes potentially remaining uncolored once their uncolored degree drops below $\log^7 n$, and then apply the following lemma to color the remaining low-degree nodes:
\begin{lemma}[Lemma 14 of \cite{CDP21}]\label{lem:lowdeg}
For any $n$-node graph $G$ with maximum degree $\Delta = \log^{O(1)} n$, there exists an $O(\log \log \log n)$-round deterministic algorithm for
computing \DILC, using $O(n^\phi)$ space per machine and $O(n^{1+\phi})$ global space, for any positive constant~$\phi \in (0,1)$.
\end{lemma}

The algorithm for \DILC when the degree is $O(n^{7\delta})$ is described in \Cref{alg:DLDC} and its correctness is proved in \Cref{thm:midcolor}.

\begin{algorithm}[H]
	\caption{\textsc{DerandomizedMidDegreeColor}$(G)$}
	\label{alg:DLDC}
	
		    Let $G_{\text{low}}$ consist of all nodes of $G$ with palette size at most $\log^7 n$.
		
		    Derandomize coloring of $G\setminus G_{\text{low}}$ as in \Cref{lem:phasecolor}, leaving a set $D$ of uncolored nodes with low degree.
		
		    
		
		    $G_{\text{low}}\gets G_{\text{low}} \cup D$.
	Update palettes of $G_{\text{low}}$, then color using \Cref{lem:lowdeg}.

\end{algorithm}

\begin{theorem}\label{thm:midcolor}
\textsc{DerandomizedMidDegreeColor}, performed on a DL1C instance $G$ of degree at most $n^{7\delta}$, properly colors all nodes in $O(\log \log\log n)$ rounds of \MPC, using $O(n^{7\delta cC})$ space per machine and $O(n_{G} \cdot n^{7\delta cC})$ global space, where $C$ is as in \Cref{lem:phasederand}.
\end{theorem}

\begin{proof}
After applying \Cref{lem:phasecolor} the degree of all uncolored nodes in $G$ must also be at most $\log^7 n$, so all remaining nodes are colored by \Cref{lem:lowdeg}.

The space usage is dominated by that of \Cref{lem:phasecolor}, and the number of rounds required is $O(\log\log\log n)$ since it is dominated by the $O(\log\log\log n)$ rounds for \Cref{lem:lowdeg}.
\end{proof}

\section{Deterministic D1LC Algorithm}

To reduce our \DILC instances to a collection of instances with lower degree, in order to fit constant-radius balls onto machines, we employ a recursive structure \textsc{LowSpaceColorReduce} (\Cref{alg:LSColorReduce}) similar to \cite{CDP20,CDP21}.

\begin{algorithm}[H]
	\caption{\textsc{LowSpaceColorReduce}$(G)$}
	\label{alg:LSColorReduce}
		$G_{\text{mid}},G_1 \dots, G_{n^{\delta}} \gets \textsc{LowSpacePartition}(G)$.
		
	    For each $i = 1, \dots,n^{\delta}-1$ in parallel: call \textsc{LowSpaceColorReduce}$(G_i)$.
	
		Update color palettes of $G_{n^{\delta}}$, call \textsc{LowSpaceColorReduce}$(G_{n^{\delta}})$.
		
		Update color palettes of $G_{\text{mid}}$.
		
		Color $G_{\text{mid}}$ using \textsc{DerandomizedMidDegreeColor}$(G_{\text{mid}})$.
\end{algorithm}

The recursive structure in the above algorithm relies on a partitioning procedure to divide the nodes and colors in the input instance into \emph{bins}:

\begin{algorithm}[H]
	\caption{\textsc{LowSpacePartition}$(G)$}
	\label{alg:LSPartition}
		Let $G_{\text{mid}}$ be the graph induced by the set of nodes $v$ with $d(v)\le n^{7\delta}$.
		
		Let hash function $h_1:[n]\rightarrow [n^{\delta}]$ map each node $v$ to a bin $h_1(v) \in [n^{\delta}]$.
		
		Let hash function $h_2:[n^2]\rightarrow [n^{\delta}-1]$ map colors $\gamma$ to a bin $h_2(\gamma) \in [n^{\delta}-1]$.
		
		Let $G_1,\dots,G_{n^{\delta}}$ be the graphs induced by bins $1,\dots,n^{\delta}$ respectively, minus the nodes in $G_{\text{mid}}$.
		
		Restrict palettes of nodes in $G_1,\dots,G_{n^{\delta}-1}$ to colors assigned by $h_2$ to corresponding bins.
    	
    	Return $G_{\text{mid}},G_1,\dots,G_{n^{\delta}}$.
\end{algorithm}





We can follow the analysis of \cite{CDP20} to analyze the partitioning process, since it is only the base cases that have changed. The following lemma provides the important properties of the partitioning:

\begin{lemma}[Lemma 4.6 of \cite{CDP20}]\label{lem:LSdeterministic-hashing}
Assume that, at the beginning of a call to \textsc{LowSpacePartition}, we have $d(v)<p(v)$ for all nodes $v$. Then, in $O(1)$ \MPC rounds with $O(n^{7\delta})$ local space per machine and $O(n+m)$ global space (over all parallel instances), one can deterministically select hash functions $h_1$, $h_2$ such that after the call,
	%
	\begin{itemize}
		\item for any node $v\notin G_{\text{mid}}$, $d'(v) < 2d(v)n^{-\delta}$, and
		\item for any node $v$, $d'(v)<p'(v)$.
	\end{itemize}
 Here $d'(v)$ denotes the degree of $v$ in the subgraph induced by the nodes present in the same bucket as $v$ and $p'(v)$ denotes the number of $v$'s palette colors that are in the same bucket as $v$.
\end{lemma}

Now we are ready to prove our main result that $(\deg+1)$-list coloring can be performed deterministically in $O(\log \log\log n)$ rounds of \MPC, with $O(n^\spacexp)$ local space and $O(m+n^{1+\spacexp})$ global space.

\begin{proof}[Proof of \Cref{thm:main:deter}]
As in \cite{CDP21}, calling \textsc{LowSpaceColorReduce} on our input graph creates a recursion tree of $O(1)$ depth (since each recursive call reduces the maximum degree by a $n^{-\delta}$ factor). It therefore creates $O(1)$ sequential sets of base-case instances to solve concurrently, which in \Cref{alg:LSColorReduce} are solved by \textsc{DerandomizedMidDegreeColor}. Furthermore, each set of concurrent instances has at most $n$ nodes in total, since all nodes are only partitioned into one instance, and each instance has maximum degree $n^{7\delta}$.

By \Cref{thm:midcolor}, each such instance $G$ is colored in $O(\log\log\log n)$ rounds using $O(n^{7\delta cC})$ space per machine and $O(n \cdot n^{7\delta cC})$ global space. The global space used by all concurrent instances is therefore $O(n \cdot n^{7\delta cC})$. Setting $\delta$ sufficiently smaller than the space parameter \spacexp, this is $O(n^\spacexp)$ space per machine and $O(n^{1+\spacexp})$ global space. Since receiving the input and the first call to \textsc{LowSpacePartition} also requires $O(m)$ global space, the overall space bound is $O(m+n^{1+\spacexp})$.
\end{proof}



\section{Proofs from Section~\ref{sec:randomized-d1lc}}
\label{sec:proofs-from-sec-3}
In this section we provide some proofs of the claims made in \Cref{sec:randomized-d1lc}. These lemmas pertain to the simulation of deterministic subroutines from Fischer \etal, such as computing certain parameters for each node, and computing a set of nodes $V_\text{start}$ for which it is not easy to generate slack using color trials.

We begin with the proof of \Cref{lem:computing_parameters}, which showed that parameters such as slack, sparsity, and disparity can be computed in sublinear \MPC in $O(1)$ rounds using $O(m+n^{1+\phi})$ global space.

\begin{proof}[Proof of \Cref{lem:computing_parameters}]
We extensively use the subroutines in \Cref{lem:mpc_subroutines_low_degree}. We deal with the parameters in the order they appear in \Cref{def:parameters}.

\begin{itemize}
\item \textbf{Slack:} In $O(1)$ rounds, we can sort tuples corresponding to edges incident to $v$ and colors in $\Psi(v)$ such that they appear on consecutive machines: we can then count how many of each there are and compute \slack{v}. 
\item \textbf{Sparsity:} Since we can  compute $d(v)$ for all nodes in constant rounds (machines can collect all edges incident to $v$ and count them), to compute $\sparsity{v}$ it remains to count the edges between neighbors of $v$. By the second subroutine in \Cref{lem:mpc_subroutines_low_degree} we can collect the $2$-hop neighborhood of $v$ on a single machine. We can then remove duplicates if necessary and count the number of edges, and then calculate \sparsity{v}.
\item \textbf{Disparity:} By the first subroutine of \Cref{lem:mpc_subroutines_low_degree} we can collect the palette of each of the neighbors of $v$ in constant rounds, allowing us to calculate \disparity{u}{v} for each neighbor $u$ of~$v$.
\item \textbf{Discrepancy:} Follows immediately from the ability to collect \disparity{u}{v} for all neighbors $u$ of $v$.
\item \textbf{Unevenness:} It suffices to collect $d(u)$ from each neighbor $u$ of $v$, which can be done using the first subroutine of \Cref{lem:mpc_subroutines_low_degree}.
\item \textbf{Slackability / Strong Slackability:} Note that these are additions of already computed parameters, and hence can be computed in constant rounds.
\end{itemize}
Mainly due to Lemma~\ref{lem:mpc_subroutines_low_degree} and from the above description, it is easy to see that each of the above parameters can be computed in $O(1)$ rounds for all the nodes in parallel. For the global space complexity for computing the above parameters, note that each node $v$ collects it $2$-neighborhood which is of size $O(\lspace)$ as maximum degree of any node is $O(\sqrt{\lspace})$.
\end{proof}



We next consider the identification of $V_{\text{start}}$, the subset of $V_\text{sparse} \cup V_\text{uneven}$ for which it is hard to generate slack. To identify this set, we consider a breakdown of $V_\text{sparse} \cup V_\text{uneven}$ into several sets based on the parameters defined in \Cref{def:parameters}.
We first explain the notion of a ``heavy'' color. A heavy color $c$ with respect to a node $v$ is a color such that if all of the neighbors of $v$ were to pick a color from their palettes uniformly at random, the expected number of neighbors of $v$ which would pick $c$ (which we denote $H(c)$) is at least some suitable constant. We denote by $\mathcal{C}_v^{\text{heavy}}$ the set of heavy colors with respect to $v$. Now $V_{\text{start}}$ is defined as follows (taken from \cite{hknt_local_d1lc}; the $\veps_i$ are all constants):


\begin{align*}
    V_{\text{balanced}} &= \{v \in V_{\text{sparse}} : |  \{u \in N(v) : d(u) > 2d(v)/3| \geq \veps_1 d(v)\}\}. \\
    V_{\text{disc}} &= \{v \in V_{\text{sparse}} : \discrepancy{v} \geq \veps_2 d(v)\}. \\
    V_{\text{easy}} &=
    \begin{aligned}[t]
        & V_{\text{balanced}} \cup V_{\text{disc}} \cup V_{\text{uneven}}\\
        & \cup \{v \in V_{\text{sparse}} : | N(v) \cap V_{\text{dense}} \geq \veps_3 d(v) \}.
    \end{aligned}\\
    V_{\text{heavy}} &= \{v \in V_{\text{sparse}} \setminus V_{\text{easy}} : \textstyle \sum_{c \in \mathcal{C}_v^{\text{heavy}}} H(c) \geq \veps_4 d(v)\}. \\
    V_{\text{start}} &= \{v \in V_{\text{sparse}} \setminus (V_{\text{easy}} \cup V_{\text{heavy}}) : N(v) \cap V_{\text{easy}} | \geq \veps_5 d(v)\}.
\end{align*}

We can now prove \Cref{lem:identifying_vstart}:
\begin{proof}[Proof of \Cref{lem:identifying_vstart}]
Observe that $V_{\text{balanced}}$, $V_{\text{disc}}$, and $V_{\text{easy}}$ are computable in $O(1)$ rounds by applications of \Cref{lem:computing_parameters} and \Cref{lem:mpc_subroutines_low_degree}. We briefly note that, again using an application of the first subroutine of \Cref{lem:mpc_subroutines_low_degree}, nodes can be made aware of which neighbors are in which sets in $O(1)$ rounds.

It remains to compute which colors are heavy (and the set $V_{\text{heavy}}$): by the first subroutine of \Cref{lem:mpc_subroutines_low_degree} we can gather the palettes of all neighbors of $v$ on $M(v)$, and then $M(v)$ can compute this information in $O(1)$ rounds.
\end{proof}

We conclude with the proof that we can identify leaders, inliers, and outliers of almost-cliques, and establish which almost-cliques require put-aside sets, in $O(1)$ rounds.

\begin{proof}[Proof of \Cref{lem:finding_leaders}]

First, we explain how we can find the \emph{leader} $x_{C}$ and the \emph{outliers} $O_{C}$ for each almost-clique $C \in \cC$. The leader $x_{C}$ is the node in $C$ with minimum slackability. As we can determine the slackability of all the nodes (in $C$) by using $O(1)$ rounds in parallel (see \Cref{lem:computing_parameters}), we can determine $x_{C}$ for each $i \in [t]$ in parallel in $O(1)$ rounds. The slackability of an almost-clique $C$ is defined as the the slackability of its leader $x_C$. $C$ is said to be a \emph{low slack} almost-clique if its slackability is at most $\ell=\log^{2.1} \Delta$. Recall that these are the almost-cliques (with low slackability) for which we need to find put-aside sets.

Let $O_C$ be the set of \emph{outliers} in an almost-clique $C$. $O_C$ is comprised of the union of the $\frac{\max\{{d(x_C), \size{C}}\}}{3}$ nodes with the fewest common neighbors with $x_C$, the $\frac{\size{C}}{6}$ nodes of largest degree, and the nodes in $C$ that are not neighbors of $x_C$. The nodes in $C$ that are not outliers are called \emph{inliers} of $C$ (denoted by $I_C$). As each almost-clique $C$ is stored in one machine and we can determine the degrees of the nodes in $O(1)$ rounds, we can clearly find $O_C$ and $I_C$ for all almost-cliques in $O(1)$ rounds. 

\end{proof}

\section{D1LC in Sublinear \MPC with Linear Global Space}
\section{Simulating Deterministic \LOCAL Subroutines in Sublinear \MPC}
\section{Simulating \LOCAL Subroutines}
\label{sec:randomized-d1lc}

First, we provide some arguments that the subroutines in \cite{hknt_local_d1lc} which are already deterministic can be simulated efficiently in the \MPC model, provided that the maximum degree of the \DILC instance is below $\sqrt{\lspace}$.
In particular we show that, deterministically in $O(1)$ rounds, when the maximum degree is less than $\sqrt{\lspace}$, we can compute: all the parameters defined in \Cref{def:parameters}, a $(\deg+1)$-ACD (see \Cref{def:acd}), the set $V_{\text{start}}\subseteq V_{\text{sparse}}$~\footnote{A full definition of $V_{\text{start}}$ will be discussed later in this section.} (whose purpose was described in Section~\ref{sec:sparse}), the leaders, inliers, and outliers from each almost-clique, and the put-aside sets.

We begin with a useful lemma that says (in essence) that nodes can send and receive $O(\sqrt{\lspace})$ messages to each of their neighbours in one round\footnote{We remark that the \MPC model is not node-centric, but machine-centric, and that when we talk about node $u$ ``sending messages'' to node $v$, we suppose that each node has a machine responsible for it, and mean that the machine responsible for node $u$ is sending a message to the machine responsible for node $v$.}, provided that their degree is $O(\sqrt{{\lspace}})$.


\begin{lemma}
\label{lem:mpc_subroutines_low_degree}
Let $G=(V, E)$ be a graph $G$ such that for all $v \in V$, $d(v) \leq O(\sqrt{\lspace})$.

Suppose each node $v$ is assigned some unique machine $M(v)$: informally, $M(v)$ is responsible for $v$. Then, the following subroutines can be performed in parallel for each $v$ (and its assigned machine $M(v)$) in $O(1)$ rounds in \MPC with $\lspace = O(n^\spacexp)$, and global space $O(m+n^{1+\spacexp})$:

\begin{itemize}
    \item $M(v)$ can send $d(v)$ messages to each machine $M'$ such that $M(u)$ is responsible for node $u \in N(v)$
    \item $M$ can collect all of the edges between neighbours of $v$
\end{itemize}
\end{lemma}
\begin{proof}
Suppose each node is allocated a machine (giving global space of $O(m+n^{1+\phi})$. The machine responsible for node $v$ can prepare $d(v)$ messages for each of the neighbors of $v$. This takes $\leq \lspace$ local space, so these messages can be sent in $O(1)$ rounds. The second subroutine follows from the first: the $d(v)$ words that each node broadcasts consists of the list of its neighbors.
\end{proof}


We can now argue that provided the maximum degree of our graph is low enough, the parameters described in \Cref{def:parameters} can be efficiently computed in parallel for each node.

\begin{lemma}[Computing Parameters of \Cref{def:parameters}]
\label{lem:computing_parameters}
  For a graph $G=(V, E)$ with maximum degree {${\Delta \leq O(\sqrt{\lspace})}$}, all of the parameters listed in \Cref{def:parameters} can be computed for every node in parallel in \MPC with $\lspace = O(n^\spacexp)$ and global space $O(m+n^{1+\spacexp})$ in $O(1)$ rounds.
\end{lemma}
\begin{proof}
We extensively use the subroutines in \Cref{lem:mpc_subroutines_low_degree}. We deal with the parameters in the order they appear in \Cref{def:parameters}.

\begin{itemize}
\item \textbf{Slack:} In $O(1)$ rounds, we can sort tuples corresponding to edges incident to $v$ and colors in $\Psi(v)$ such that they appear on consecutive machines: we can then count how many of each there are and compute \slack{v}. 
\item \textbf{Sparsity:} Since we can  compute $d(v)$ for all nodes in constant rounds (machines can collect all edges incident to $v$ and count them), to compute $\sparsity{v}$ it remains to count the edges between neighbors of $v$. By the second subroutine in \Cref{lem:mpc_subroutines_low_degree} we can collect the $2$-hop neighborhood of $v$ on a single machine. We can then remove duplicates if necessary and count the number of edges, and then calculate \sparsity{v}.
\item \textbf{Disparity:} By the first subroutine of \Cref{lem:mpc_subroutines_low_degree} we can collect the palette of each of the neighbors of $v$ in constant rounds, allowing us to calculate \disparity{u}{v} for each neighbor $u$ of~$v$.
\item \textbf{Discrepancy:} Follows immediately from the ability to collect \disparity{u}{v} for all neighbors $u$ of $v$.
\item \textbf{Unevenness:} It suffices to collect $d(u)$ from each neighbor $u$ of $v$, which can be done using the first subroutine of \Cref{lem:mpc_subroutines_low_degree}.
\item \textbf{Slackability / Strong Slackability:} Note that these are additions of already computed parameters, and hence can be computed in constant rounds.
\end{itemize}
Mainly due to Lemma~\ref{lem:mpc_subroutines_low_degree} and from the above description, it is easy to see that each of the above parameters can be computed in $O(1)$ rounds for all the nodes in parallel. For the global space complexity for computing the above parameters, note that each node $v$ collects it $2$-neighborhood which is of size $O(\lspace)$ as maximum degree of any node is $O(\sqrt{\lspace})$.
\end{proof}

Next, we show how to compute an $(\deg +1)$-ACD in $O(1)$ rounds.

\begin{lemma}[Computing $(\deg+1)$-ACD]
\label{lem:computing_acd}
For a graph $G=(V, E)$ with maximum degree $\Delta \leq O(\sqrt{\lspace})$, an $(\deg+1)$-ACD can be computed in \MPC with $\lspace = O(n^\spacexp)$ and global space $O(m+n^{1+\phi})$ in $O(1)$ rounds.
\end{lemma}
\begin{proof}
First we argue that the diameter of the subgraph induced by any almost-clique $C$, is at most $2$. This is the case because, by (iv) of \Cref{def:acd}, the number of neighbors any node $v \in C$ has in $C$ is $\size{N(v) \cap C} \geq \frac{\size{C}} {1+ \veps_{ac}} > \frac{\size{C}}{2}$. Hence, any two nodes $u$ and $v$ in $C$ have a common neighbor in $C$.

By \Cref{lem:computing_parameters}, all nodes ($v$) can compute their sparsity ($\zeta_v$) and unevenness ($\eta_v$) in $O(1)$ rounds in parallel. From the values of $\zeta_v$ and $\eta_v$, we can decide whether $v \in V_{\text{sparse}}$, or $v \in V_{\text{uneven}}$, or neither, giving the following:
\begin{observation}\label{obs:spse}
{$V_{\text{sparse}}$ and $V_{\text{uneven}}$ can be determined in $O(1)$ rounds.}
\end{observation}

Note that each node $v \in (V \setminus (V_{\text{sparse}} \sqcup V_{\text{uneven}}))$ is present in some almost-clique $C(v)$. As the diameter of $G[C(v)]$ is at most 2, $C(v)$ is a subset of the $2$-hop neighborhood of $v$. As $d(v)\leq O(\sqrt{\lspace})$, by \Cref{lem:mpc_subroutines_low_degree} the $2$-hop neighborhood of $v$ can be found in $O(1)$ rounds and stored on a single machine. Once this is done, $M(v)$ can determine which almost-clique $v$ is in. Putting everything together, the ACD can be computed in $O(1)$ rounds and the global space used is $O(m+n^{1+\phi})$.
\end{proof}

Finally, we argue that we can identify a set of nodes $V_\text{start} \subseteq V_\text{sparse} \sqcup V_\text{uneven}$ for which it is difficult to generate slack. We give a more detailed characterisation of $V_\text{start}$ shortly, and prove \Cref{lem:identifying_vstart}  that says that $V_\text{start}$ can be identified in $O(1)$ rounds. We then argue (\Cref{lem:finding_leaders}) that we can identify the leaders, inliers, and outliers of all almost-cliques, and identify the almost-cliques for which we need to create a put-aside set in $O(1)$ rounds.

We now consider the identification of $V_{\text{start}}$, the subset of $V_\text{sparse} \cup V_\text{uneven}$ for which it is hard to generate slack. To identify this set, we consider a breakdown of $V_\text{sparse} \cup V_\text{uneven}$ into several sets based on the parameters defined in \Cref{def:parameters}.
We first explain the notion of a ``heavy'' color. A heavy color $c$ with respect to a node $v$ is a color such that if all of the neighbors of $v$ were to pick a color from their palettes uniformly at random, the expected number of neighbors of $v$ which would pick $c$ (which we denote $H(c)$) is at least some suitable constant. We denote by $\mathcal{C}_v^{\text{heavy}}$ the set of heavy colors with respect to $v$. Now $V_{\text{start}}$ is defined as follows (taken from \cite{hknt_local_d1lc}; the $\veps_i$ are all constants):


\begin{align*}
    V_{\text{balanced}} &= \{v \in V_{\text{sparse}} : |  \{u \in N(v) : d(u) > 2d(v)/3| \geq \veps_1 d(v)\}\}. \\
    V_{\text{disc}} &= \{v \in V_{\text{sparse}} : \discrepancy{v} \geq \veps_2 d(v)\}. \\
    V_{\text{easy}} &=
    \begin{aligned}[t]
        & V_{\text{balanced}} \cup V_{\text{disc}} \cup V_{\text{uneven}}\\
        & \cup \{v \in V_{\text{sparse}} : | N(v) \cap V_{\text{dense}} \geq \veps_3 d(v) \}.
    \end{aligned}\\
    V_{\text{heavy}} &= \{v \in V_{\text{sparse}} \setminus V_{\text{easy}} : \textstyle \sum_{c \in \mathcal{C}_v^{\text{heavy}}} H(c) \geq \veps_4 d(v)\}. \\
    V_{\text{start}} &= \{v \in V_{\text{sparse}} \setminus (V_{\text{easy}} \cup V_{\text{heavy}}) : N(v) \cap V_{\text{easy}} | \geq \veps_5 d(v)\}.
\end{align*}


\begin{lemma}[Identifying $V_{\text{start}}$]
\label{lem:identifying_vstart}
Given a graph $G=(V, E)$ with maximum maximum degree $\Delta \leq \sqrt{\lspace}$, the set $V_{\text{start}}$ can be identified on an \MPC with $\lspace = O(n^\spacexp)$ and global space $O(m+n^{1 + \phi})$ in $O(1)$ rounds.
\end{lemma}
\begin{proof}
Observe that $V_{\text{balanced}}$, $V_{\text{disc}}$, and $V_{\text{easy}}$ are computable in $O(1)$ rounds by applications of \Cref{lem:computing_parameters} and \Cref{lem:mpc_subroutines_low_degree}. We briefly note that, again using an application of the first subroutine of \Cref{lem:mpc_subroutines_low_degree}, nodes can be made aware of which neighbors are in which sets in $O(1)$ rounds.

It remains to compute which colors are heavy (and the set $V_{\text{heavy}}$): by the first subroutine of \Cref{lem:mpc_subroutines_low_degree} we can gather the palettes of all neighbors of $v$ on $M(v)$, and then $M(v)$ can compute this information in $O(1)$ rounds.
\end{proof}

In the following lemma, we show that we can identify leaders, inliers, and outliers of almost-cliques, and establish which almost-cliques require put-aside sets, in $O(1)$ rounds.

\begin{lemma}[Finding leaders, outliers and inliers]
\label{lem:finding_leaders}
Given a graph $G=(V, E)$ with maximum maximum degree $\Delta \leq \sqrt{\lspace}$, we can find the leader, the set of inliers and the set of outliers for each almost-clique in \MPC with $\lspace = O(n^\spacexp)$ and global space $O(m+n^{1 + \phi})$ in $O(1)$ rounds. Moreover, we can detect all the almost-cliques for which we need to find put-aside sets.
\end{lemma}
\begin{proof}

First, we explain how we can find the \emph{leader} $x_{C}$ and the \emph{outliers} $O_{C}$ for each almost-clique $C \in \cC$. The leader $x_{C}$ is the node in $C$ with minimum slackability. As we can determine the slackability of all the nodes (in $C$) by using $O(1)$ rounds in parallel (see \Cref{lem:computing_parameters}), we can determine $x_{C}$ for each $i \in [t]$ in parallel in $O(1)$ rounds. The slackability of an almost-clique $C$ is defined as the the slackability of its leader $x_C$. $C$ is said to be a \emph{low slack} almost-clique if its slackability is at most $\ell=\log^{2.1} \Delta$. Recall that these are the almost-cliques (with low slackability) for which we need to find put-aside sets.

Let $O_C$ be the set of \emph{outliers} in an almost-clique $C$. $O_C$ is comprised of the union of the $\frac{\max\{{d(x_C), \size{C}}\}}{3}$ nodes with the fewest common neighbors with $x_C$, the $\frac{\size{C}}{6}$ nodes of largest degree, and the nodes in $C$ that are not neighbors of $x_C$. The nodes in $C$ that are not outliers are called \emph{inliers} of $C$ (denoted by $I_C$). As each almost-clique $C$ is stored in one machine and we can determine the degrees of the nodes in $O(1)$ rounds, we can clearly find $O_C$ and $I_C$ for all almost-cliques in $O(1)$ rounds. 

\end{proof}

\section{Algorithm for $\Delta$-coloring}
\label{sec:Delta-coloring}

In this section we show an application of \Cref{thm:main:deter} and present a $O(\log\log\log n)$-round \MPC algorithm for the related problem of $\Delta$-coloring (\Cref{thm:delta_coloring}). The algorithm is randomized, and uses \Cref{thm:main:deter} repeatedly as a subroutine.

We remark that we can only find a $\Delta$-coloring when $\Delta \geq 3$ in $o(\log n)$ rounds in low-space \MPC. This is because there is a long-standing conjecture that we cannot distinguish a cycle of length $n$ from two cycles of length $n/2$ in $o(\log n)$ rounds of sublinear \MPC: it is therefore not possible to tell in $o(\log n)$ sublinear \MPC rounds whether a $2$-regular graph with $n=4k+2$ is a single ($2$-colorable) cycle of even length, or two (not $2$-colorable) cycles of odd length. We note that the assumption that $\Delta \geq 3$ is standard in all distributed literature on this problem.

In the following subsections, we show that we can simulate a version of the algorithm given by Fischer \etal (\cite[Theorem 1.1]{FHM23}), which gives an algorithm to $\Delta$-color graphs with {$\Delta = \omega(\log^{3} n)$} in the \CONGEST model, to give an algorithm for the same range of $\Delta$ in $O(\log\log\log n)$ \MPC rounds.

This \CONGEST algorithm fails for $\Delta = o(\log^3 n)$, because certain claims no longer succeed with sufficiently high probability.
However, for graphs which do not satisfy this condition (i.e.~those for which $\Delta = o(\log^{3} n)$), we can in any case find a $\Delta$-coloring in $O(\log\log\log n)$ rounds, by exploiting graph exponentiation to collect our $O(\poly \log\log n)$-hop neighborhoods, and then simulating an earlier \LOCAL algorithm given by \cite{DBLP:journals/dc/GhaffariHKM21}.

\begin{remark}
\label{rem:low_degree_delta_coloring}
    There is a randomized $O(\log\log\log n)$-round low-space \MPC algorithm which, w.h.p., finds a $\Delta$-coloring of $G$, for any $n$-node graph $G$ with maximum degree $\Delta$ such that $\Delta \geq 3$, $\Delta = O(\log^{3} n)$, and $G$ does not contain a clique of size $(\Delta + 1)$.
    The algorithm uses $O(m+n^{1+\phi})$ global space.
\end{remark}
\begin{proof}
    Ghaffari \etal give a randomized \LOCAL algorithm for the problem of $\Delta$-coloring when $\Delta \geq 3$ \cite[Theorem 1]{DBLP:journals/dc/GhaffariHKM21}. This algorithm succeeds w.h.p.~and runs in $O(\sqrt{\Delta \cdot \log \Delta} \cdot \log^* \Delta \cdot \log^2\log n)$ rounds. We note that the complexity of this algorithm is $O(\poly\log\log n)$ when $\Delta = O(\poly \log n)$.
    
    \LOCAL algorithms which run in $k$ rounds can be simulated in \MPC by collecting the $k$-hop neighborhood of each node and simulating the choices of the nodes. Therefore, simulating the algorithm of Ghaffari \etal requires each node to collect its $O(\poly \log\log n)$-hop neighborhood. This can be achieved using graph exponentiation in $O(\log(\poly\log\log n)) = O(\log\log\log n)$ rounds, using at most $(\log^{3} n)^{O(\poly\log\log n)} = o(n^\phi)$ extra global space per node, for an arbitrary positive constant $\phi$. 
\end{proof}

We also give the following lemma, which much of the rest of this section will be dedicated to proving:

\begin{lemma}
\label{lem:high_degree_delta_col}
    There is a randomized $O(\log\log\log n)$-round low-space \MPC algorithm which, w.h.p., finds a $\Delta$-coloring of $G$, for any $n$-node graph $G$ with maximum degree $\Delta$ such that $\Delta \geq 3$, $\Delta = \omega(\log^{3} n)$, and $G$ does not contain a clique of size $(\Delta + 1)$.
    The algorithm uses $O(m+n^{1+\phi})$ global space for an aritrary constant $\phi \in (0,1)$.
\end{lemma}

Note that these two results together imply \Cref{thm:delta_coloring}:

\begin{proof}[Proof of \Cref{thm:delta_coloring}]
    We can determine the value of $\Delta$ for our input graph in $O(1)$ rounds using sorting and prefix sum. Then we use either the algorithm of \Cref{rem:low_degree_delta_coloring} or \Cref{lem:high_degree_delta_col}, depending on the value of $\Delta$.
\end{proof}

\subsection{Overview of $\Delta$-coloring algorithm of \cite{FHM23}}

We give a brief overview of the $\Delta$-coloring algorithm of \cite{FHM23} for the \LOCAL and \CONGEST models of distributed computing {when $\Delta \omega(\log ^3 n)$}. In general, $\Delta$-coloring is much harder than $(\Delta+1)$-coloring in a distributed or parallel setting: in part because each node has one fewer color, but primarily because while in $(\Delta+1)$-coloring a valid partial coloring can be extended to a valid full coloring, this is not the case for $\Delta$-coloring. 

Because of this, one has to be much more careful when coloring nodes in a $\Delta$-coloring problem: it must be ensured that all nodes can be given some ``slack'' so that they can be colored.
To achieve this, Fischer \etal first perform an almost-clique decomposition tailored to the problem of $\Delta$-coloring. They then categorise the almost-cliques into several different types, depending on how easy it is to generate slack for nodes inside them and what type of external neighbors they have. Nodes are then classified into a series of sets, depending on which type of almost-clique they belong to, and whether they have many neighbors to particular almost-cliques.

Finally, these sets of nodes are colored, in a particular order. Most sets of nodes need to be colored using a $(\deg + 1)$-list-coloring algorithm

We break down our proof of \Cref{lem:high_degree_delta_col} as follows.
In \Cref{sec:delta-acd} we define a useful ACD for $\Delta$-coloring and show that it can be computed in $O(1)$ rounds of low-space \MPC.
In \Cref{sec:classifying_nodes} we show that we can perform the necessary classifications of almost-cliques and nodes which the algorithm of Fischer \etal requires in $O(1)$ \MPC rounds.
Finally, in \Cref{sec:implementation_of_coloring} we show that the coloring procedures can be implemented in $O(\log\log\log n)$ \MPC rounds and conclude the proof of \Cref{lem:high_degree_delta_col}.



\subsection{Almost-clique decomposition in $O(1)$ rounds}
\label{sec:delta-acd}

We begin by defining an ACD which will be useful in $\Delta$-coloring. {This ACD was originally given by Fischer \etal \cite{FHM23}}. We note that a different ACD is required for $\Delta$-coloring than for the \DILC problem (as used in \Cref{sec:acd}). This ACD is to some extent simpler, as all almost-cliques are approximately of the same size, and there is no $V_\text{uneven}$.

\begin{definition}[$\Delta$-{\sc ACD}]\label{defi:acd-delta}
Let $G(V,E)$ be a graph and $\veps \in (0,1/20)$ be a constant. A partition of $V$ into $V_{\text{sparse}}$ and  $V_{\text{dense}}$, such that $V_{\text{dense}}$ is further partitioned into $C_1,\ldots,C_k$, is said to be an \emph{almost-clique decomposition} ({\sc ACD}) if
\begin{enumerate}
\item[(i)] For each node in $v \in V_{\text{sparse}}$, $\frac{1}{\Delta}\left[{\Delta \choose 2}-m(N(v))\right]=\Omega(\eps^2\Delta)$; 
\item[(ii)] For every $i \in [k]$, $(1-\eps)\Delta \leq \size{C_i} \leq (1+3\eps)\Delta$;
\item[(iii)] Each dense node $v$ has at least $(1-4\veps)\Delta$ neighbors in $C(v)$, i.e., $\size{N(v) \cap C(v)} \geq (1-4\veps)\Delta$;
\item[(iv)] For each node $u \notin C_i$, $\size{N(u) \cap C_i} \leq (1-2\eps)\Delta$.
\end{enumerate}
\end{definition}

{For the description of the algorithm for computing a {\sc $\Delta$-ACD}, we need some notions in the following definition.}

\begin{definition}[{similarity, friend and density} ~\cite{HKMT20}]
 Two nodes $u,v \in V$ are \emph{$\eta$-similar} if $\size{N(u) \cap N(v)}\geq (1-\eta)\Delta$. Two $\eta$-similar  nodes $u$ and $v$ are such that $\{u,v\} \in E$ are said to be \emph{$\eta$-friend}. A node $v$ is said to be \emph{$\eta$-dense} if $v$ has at least $(1-\eta)\Delta$ $\eta$-friends.
\end{definition}

We now give an algorithm for computing the ACD defined in \Cref{defi:acd-delta}. A similar version of this algorithm was originally given by Halld{\'o}rsson \etal for $(\Delta + 1)$-coloring in the \CONGEST model \cite[Algorithm 7]{HKMT20} and subsequently adapted by \cite{FHM23} for use in $\Delta$-coloring. We show later that it can also be implemented in \MPC with low local space, in $O(1)$ rounds.

\begin{algorithm}[H]
	\caption{\textsc{$\Delta$-ACD-Computation}$(G,\eps)$}
	\label{alg:acd-delta}

 Let $\eta=\eps/27$;

Sample each node with probability $1/\sqrt{\Delta}$ and add to a set $S$.

For each $v \in V$, select a node from $N(v) \cap S$ u.a.r., and add it to multi-set $T$ with probability $\min\{1,\size{S \cap N(v)}/2\sqrt{\Delta}\}$.

For $v \in S$, if there is some $u \in N(v) \cap S$ such that $u$ has multiplicity $(1-2\eta)\sqrt{\Delta}/2$ in $T$, then $v$ is said to be \emph{approximately $\eta$-similar} to $u$.

$F = \{\{u,v\} \in E : \text{$u$ is approximately $\eta$-similar to $v$}\}$

\mbox{$S_{\text{dense}}=\{v \in S: v~\mbox{is incident to at least $(1-2\eta)\sqrt{\Delta}$ edges in $F$}\}$.}

Each $v \in S_\textit{dense}$ selects a $u \in S_{\text{dense}}$ such that $\{u,v\}\in F$ and the ID of $u$ is minimized, and adds $u$ to a multiset $U$.

For each $v \in V$, if there exists a $u \in N(v)$ that has multiplicity at least $(1-11\eta)\sqrt{\Delta}$ in $U$, then the AC-ID of $v$ is the same as the ID of $u$. Otherwise, $v$ is added to  a set $V'$.\label{algo13:step8}


Let $C_1', \ldots, C_{t}'$ be sets of nodes such that nodes in $C_k'$ have the same AC-ID. For $i \in [t]$, if $\size{C_i}< (1-\eta)\Delta$ or there is an $x \in C_i$ such that $\size{N(x) \cap C_i} < (1-\eta)\Delta$, then remove the AC-ID of nodes in $C_i'$ and add them to $V'$.\label{algo13:step9}


Let $v \in V'$. If $v$ has at least $(1-108\eta)\Delta$ neighbors in some $C_i'$, then give $v$ the same AC-ID as that of $C_i'$.\label{algo13:step10}

Output $C_1, \ldots, C_{k}$ such that the nodes in a particular $C_i$ have the same AC-ID; $V_{\text{dense}}=C_1 \sqcup \ldots \sqcup C_k$; and $V_{\text{sparse}}=V \setminus V_{\text{dense}}$.\label{algo13:step11}

\end{algorithm}

\begin{theorem}\label{lem:main-delta}
The output produced by \textsc{$\Delta$-ACD-Computation} is an $\Delta$-{\sc ACD} as defined in Definition~\ref{defi:acd-delta}. Moreover, \textsc{$\Delta$-ACD-Computation} can be implemented in $O(1)$ \MPC rounds.
\end{theorem}

To prove the above lemma, we have the following guarantee.

\begin{lemma}[Lemma~9.7 in \cite{HKMT20}]
\label{lem:hal-delta} 
With high probability, the following hold.
\begin{itemize}
    \item[(i)] Every $\eta/4$-dense node has an AC-ID and, for $i \in [t]$, $C_i'$ has a $\eta/4$-dense node. That is, for every node in $v \in V'$, we have $\frac{1}{\Delta}\left[{\Delta \choose 2}-m(N(v))\right]=\Omega(\eta^2\Delta)$;
    \item[(ii)] For $i \in [t]$, $ (1-\eta)\Delta \leq \size{C_i'}\leq (1+25\eta)\Delta$;
    \item[(iii)] For $i \in [t]$ and $v \in C_i'$, $\size{N(v) \cap C(v)} \geq (1-27\eta)\Delta$.
\end{itemize}
\end{lemma}

\begin{lemma}[Properties of the output produced by Algorithm~\ref{alg:acd-delta}]
\label{lem:acd-brook}
The output produced by \textsc{$\Delta$-ACD-Computation} is an {\sc ACD} as defined in Definition~\ref{defi:acd-delta}.
\end{lemma}

\begin{proof}
The proof is similar to the proof of Lemma 2.1 of~\cite{FHM23}. For completeness, we are having it here.
   Consider $V'$ and $C_1',\ldots,C_t'$ in Step~\ref{algo13:step8} and~\ref{algo13:step9} of Algorithm~\ref{alg:acd-delta}, respectively. Note that they satisfy all three properties as mentioned in Lemma~\ref{lem:hal-delta} with high probability. Now, consider the way the algorithm finds $C_1,\ldots,C_t$ and $V_{\text{sparse}} \subseteq V'$ in Steps~\ref{algo13:step10} and Steps~\ref{algo13:step11}.

  For the proof of (i) of Definition~\ref{defi:acd-delta}, from Lemma~\ref{lem:hal-delta} (i), note that for each node $v \in V'$ we have $\frac{1}{\Delta}\left[{\Delta \choose 2}-m(N(v))\right]=\Omega(\eps^2\Delta)$. As $V_{sparse} \subseteq V'$ and $\eta=\eps/27$, each node $v \in V_{\text{sparse}}$ satisfies $\frac{1}{\Delta}\left[{\Delta \choose 2}-m(N(v))\right]=\Omega(\eps^2\Delta)$.

  For the proof of (ii), we first show an upper bound on $\size{C_i \setminus C_i'}$. By Lemma~\ref{lem:hal-delta} (iii), the number of neighbors of any node in $C_i'$ that are outside $C_i'$ is at most $27\eta\Delta$. Again by Lemma~\ref{lem:hal-delta} (ii), the total number of edges having exactly one end point inside $C_i'$ is at most $\size{C_i'} \cdot 27 \eta \leq (1+25 \eta)27 \eta \Delta$. By the definition of tnodesces in $C_i \setminus C_i'$, as each node in $C_i \setminus C_i'$ has at least $(1-108\eta)\Delta$ neighbors, $\size{C_i \setminus C_i'}$ is at most $(1+25 \eta)27 \eta \Delta/(1-108\eta) \leq 50\eta \Delta$. So, $\size{C_i}=\size{C_i'}+\size{C_i \setminus C_i'}=(1+25 \eta)\Delta +50\eta \Delta=(1+75\eta)\Delta<(1+3\eps)\Delta$.

  Now, we proof of (iii) of Definition~\ref{defi:acd-delta}. Due to (iii) of Lemma~\ref{lem:hal-delta} and the way the algorithm adnodeses to $C_i'$ to form $C_i$, for any node $v \in C_i$, $\size{N(v) \cap C(v)} \geq (1-108\eta)\Delta = (1-4\eps)\Delta$.

  For the proof of (iv) of Definition~\ref{defi:acd-delta}, consider the definition of $V_{\text{sparse}}$. Each node in $V_{\text{sparse}}$ has at most $(1-108\eta)\Delta$ neighbors in $C_i'$. Also, we have argued that $\size{C_i \setminus C_i'}\leq 50 \eta \Delta$. So, each node in $V_{\text{sparse}}$ has at most $(1-58\eta)\Delta < (1-2\eps)\Delta$ neighbors in $C_i'$.
\end{proof}

We briefly describe the preconditions of our \MPC and give some notation:

\begin{remark}
\label{rem:mpc_initial_conditions}
    At the beginning of an \MPC algorithm whose input is a graph $G = (V, E)$, we may assume that the edges are sorted lexicographically across machines: i.e., each machine has a contiguous subsequence of the sorted edges.
    If there is an edge $\{x, y\} \in E$, we assume that both pairs $(x, y)$ and $(y, x)$ are present.
    We say that the pair $(x, y)$ is incident to $x$, but that $(y, x)$ is not. 
    
    We say a machine $M(v)$ is responsible for a node $v$ if the lexicographically first edge incident to $v$ is on $M(v)$. Machines can figure out which nodes they are responsible for in $O(1)$ rounds using some simple communication with the machines which have the adjacent data.
    
    We note that if these conditions are not present, they can be enforced in $O(1)$ rounds using sorting.
\end{remark}

We now give a lemma similar to \Cref{lem:mpc_subroutines_low_degree}, except these subroutines work regardless of the maximum degree of the input graph:

\begin{lemma}
\label{lem:mpc_capabilities}
    Given the preconditions and definitions in \Cref{rem:mpc_initial_conditions}, we can perform each of the following tasks in $O(1)$ rounds:
    
    \begin{itemize}
        \item \textbf{Broadcasting a message to all neighbors:} Given a node $v$, we can append a message $\mu$ to each edge $(u, v)$ for each $u \in N(v)$. This can be done for all nodes in parallel, and if the messages are all of size $\mu$ the procedure requires a $O(\mu)$ factor extra global space.
        \item \textbf{Uniformly at random selecting a neighbor:} Each node $v$ can randomly select a neighbor $u \in N(v)$ and broadcast its choice to all neighbors. This selection can be restricted to a subset of neighbors.  
    \end{itemize}
\end{lemma}
\begin{proof}
    We start by describing the useful subroutine of ``broadcasting a message'' $\mu$ from a node $u$ to all neighbors of $u$. First, we append $\mu$ to all pairs incident to $u$: for example if there was a pair $(u, v)$ this pair would now be a tuple $(u, v, \mu)$. Then we swap the first two elements of the tuple (our example would become $(v, u, \mu)$. Finally, we sort these tuples lexicographically: we now have a state similar to our preconditions in \Cref{rem:mpc_initial_conditions}, except all tuples incident to $u$ are ``tagged'' with some additional information from that neighbor of $u$. We remark that this operation just requires sorting and can therefore be achieved in $O(1)$ rounds using a factor of $O(\mu)$ extra global space.
    
    It is similarly easy to see that selecting a uniformly random neighbor of each node can be achieved in $O(1)$ rounds: append a random value to each edge and sort all edges lexicographically by their first endpoint and this random value, then pick the first incident edge. Again this just requires sorting and so takes $O(1)$ rounds, and this can be done for all the nodes in parallel. We also note that this can be easily extended to selecting at random from a subset of neighbors (by including membership of the subset in the sorting criteria).
\end{proof}

\begin{lemma}[MPC Implementation of ACD Decomposition]
\label{lem:mpc-acd-delta}
    \Cref{alg:acd-delta} can be implemented in $O(1)$ rounds on an MPC with $O(n^\phi)$ local space and $O(m+n)$ global space. 
\end{lemma}
\begin{proof}
We argue for each line in the proof that some mix of sorting, prefix sum, local computation, and subroutines from \Cref{lem:mpc_capabilities} suffices.
\begin{itemize}
    \item Line 2 can be decided for each $v$ by $M(v)$ without communication.
    
    \item Lines 3 and 7 are subroutines similar to the uniformly random selection in \Cref{lem:mpc_capabilities} (although line 7 does not require randomness). Suppose node $u$ selects node $v$ to add to $T$ (resp.\ $U$): $M(u)$ can broadcast this fact to $M(v)$, and then $M(v)$, using prefix sum, can compute the multiplicity of $v$ in $T$ (resp.\ $U$) and broadcast it to all neighbors of $v$.
    
    \item Lines 4, 6, 8, 9, and 10 require us to calculate for each node how many of its neighbors satisfy some condition. This can be done by all nodes first broadcasting whether they meet the condition (as in \Cref{lem:mpc_capabilities} followed by prefix sum to count the number of a node's neighbors which do meet the precondition.\\
    Consider line 4 as an example. First, all nodes broadcast which neighbor they selected in line 3. Then, for each node $v$, $M(v)$ knows how many of $v$'s neighbors selected $v$ in line 3, and therefore what the multiplicity of $v$ in $T$ is. $M(v)$ can then broadcast to its neighbors whether $v$ has sufficient multiplicity.
    
    \item Line 5: requires us to calculate for each node whether neighbors meet two conditions (membership of $S$ and multiplicity in $T$). This can be done locally once machines responsible for neighbors broadcast this information.
\end{itemize}
\end{proof}

Finally, we observe that we have shown both constituent parts of \Cref{lem:main-delta}.

\begin{proof}[Proof of Lemma~\ref{lem:main-delta}]
    Follows immediately from \Cref{lem:acd-brook} and \Cref{lem:mpc-acd-delta}.
   
\end{proof}
\subsection{Classifying nodes in sublinear \MPC}
\label{sec:classifying_nodes}

After an ACD is performed in the algorithm of \cite{FHM23}, nodes are partitioned into a series of sets, which have to be ($(\deg+1)$-list-) colored in a particular order in order to ensure that each node has enough slack.
The classification of nodes into these sets is based on several factors: the most significant is the \emph{type} of almost-clique which the node is in. Almost-cliques are divided into types based on how easy they are to color.
In this subsection we discuss the classification of almost-cliques (and subsequently, of nodes), and how this can be done in \MPC.

We first give the classification of almost-cliques specified by Fischer \etal \cite{FHM23}:

\begin{definition}[almost-clique Classification (\cite{FHM23})]
\label{def:ac_classification}
A node is \emph{special} to an almost-clique if it is outside the almost-clique and has at least $\Delta^{2/3}/2$ neighbors in the AC. A node is \emph{simplicial} if its neighborhood is a clique. Almost-cliques are characterized as follows:
\begin{itemize}
    \item An \emph{easy} almost-clique is one that contains a simplicial node or a non-edge.
    \item A \emph{difficult} almost-clique is an almost-clique which is: not easy; has a special neighbor; and $|C| \geq \Delta - \Delta^{1/3}$. Each difficult clique picks a special neighbor $v$ and is then assigned a subtype in the following way:
    \begin{itemize}
        \item $v$ is an \emph{escape} if it is picked by at least two almost-cliques. Almost-cliques which pick escapes as their special neighbor are \emph{runaway} almost-cliques.
        \item $v$ is a \emph{protector} if it is only picked by one AC. Almost-cliques which pick protectors as their special neighbor are \emph{guarded} almost-cliques.
    \end{itemize}
    \item A \emph{nice} almost-clique is easy or contains a picked special node (of another AC).
    \item An \emph{ordinary} almost-clique is neither difficult nor nice.
\end{itemize}
\end{definition}

We simplify the \MPC implementation of classifying almost-cliques by observing that the following is an equivalent definition of an ``easy'' almost-clique.
\begin{observation}[Alternative Characterisation of Easy Cliques]
\label{obs:alternative_easy_cliques}
    An almost-clique $C$ is easy if either of the following are true:
    (i) there is a non-edge in $C$; or
    (ii) $C$ is a clique, and there is a node in $C$ with no external degree.
\end{observation}
\begin{proof}
    It suffices to show that if there is no non-edge in the almost-clique, then all simplicial nodes in the almost-clique have no external degree. We prove this by contradiction.

    Let $C$ be an almost-clique with no non-edge (i.e.~$C$ is a clique). Suppose there is a simplicial node $v$ with an external neighbor $w$. Then $w \not \in C$, and (since $v$ is simplicial) there is an edge between $w$ and all nodes of $C$. But this contradicts Definition~\ref{defi:acd-delta}.(iv), implying $C$ is not an almost-clique. Therefore no simplicial node in $C$ can have any external degree.
\end{proof}

Using this classification of almost-cliques, Definition 3.2 of \cite{FHM23} requires that the nodes of the graph are partitioned into the following sets:
\begin{itemize}
    \item $\mathcal{P}$: The set of protector nodes
    \item $\mathcal{E}$: The set of escape nodes
    \item $\mathcal{V_*}$: Nodes in $V_{sparse}$, excluding those in $\mathcal{P} \cup \mathcal{E}$
    \item $\mathcal{O}$: Nodes in ``ordinary'' almost-cliques
    \item $\mathcal{R}$: Nodes in ``runaway'' almost-cliques
    \item $\mathcal{N}$: Nodes in ``nice'' almost-cliques, excluding any in $\mathcal{P} \cup \mathcal{E}$
    \item $\mathcal{G}$: Nodes in ``guarded'' almost-cliques
\end{itemize}

\begin{lemma}\label{lem:group}
    Given an ACD $G = V_{sparse} \sqcup C_1 \sqcup \dots \sqcup C_k$, we can classify all nodes in $G$ into one of the sets from \cite[Definition~3.2]{FHM23} in $O(1)$ \MPC rounds.
\end{lemma}
\begin{proof}
    First, we will show that we can classify the almost-cliques $C_1\dots C_k$ according to Definition~\ref{def:ac_classification}. For ease of presentation we will assume that each almost-clique has a dedicated series of machines responsible for classifying it, and consider the classification of some almost-clique $C$ which has $n$ nodes and $m$ edges (including edges incident to $C$). We can classify this almost-clique in $O(1)$ \MPC rounds, using $O(n^\delta)$ local space and $O(n+m)$ global space as follows:
    \begin{itemize}
        \item \textbf{Easy almost-cliques:} We make use of Observation~\ref{obs:alternative_easy_cliques}, and check the two conditions in order.\\
        To check condition~(i) we can add a boolean value to an edge to indicate if both endpoints are in $C$. Let $m'$ be the number of edges with both endpoints in $C$: to check whether there is a non-edge it suffices to check whether $m' = \binom{n}{2}$. This can be done using sorting and prefix-sum in $O(1)$ \MPC rounds. Note that it is easy to extend this computation to \emph{find} a non-edge (find a node in $C$ with degree less than $|C-1|$, and find a node in $C$ which it is not connected to).\\
        If there is no non-edge in $C$ then we need to check condition~(ii): whether there is any node in $C$ with no external degree. This can be done using sorting and an application of prefix sum, again in $O(1)$ \MPC rounds.

        \item \textbf{Difficult almost-cliques:} We have already checked whether $C$ is easy, and it's also easy to check whether $C$ is of sufficient size.\\
        It remains to check whether $C$ has a special neighbor, and if so to further classify $C$ as a ``guarded'' or a ``runaway'' almost-clique. We can sort the edges which are incident to one node in $C$ and one outside and count the number of instances of each endpoint outside $C$ (using prefix sum) to determine if any of them are special.\\
        If we do not find a node outside $C$ with enough neighbors in $C$ then $C$ is not difficult. If we do then suppose $v$ is the special node which we pick arbitrarily. We move our frame of reference temporarily to the computation across all almost-cliques. Each almost-clique can send its picked special node to a dedicated series of machines. These machines can sort the special nodes which they receive and determine whether each special node was picked by more than one almost-clique using prefix sum. Then these machines can identify whether each special node is a protector or an escape, and send this information back to the almost-cliques that submitted the special nodes.
       

        \item \textbf{Nice almost-cliques:} $C$ knows whether it is easy, and the dedicated machines which computed the special nodes can inform the machines responsible for $C$ as to whether $C$ contains any picked special nodes.
       

        \item \textbf{Ordinary almost-cliques:} $C$ knows from the previous steps whether it is difficult or nice. If it is neither, it is ordinary.
    \end{itemize}

    From the almost-clique decomposition (\Cref{lem:mpc-acd-delta}), for each node $v$, $M(v)$ knows either which almost-clique $v$ is in, or knows that $v$ is in $V_{sparse}$. Following the computation above, the machines responsible for classifying $C$ can broadcast its type, and so: if $v$ is in an almost-clique, $M(v)$ also knows which type of almost-clique it is in. $M(v)$ also knows whether $v$ was picked as a special node by any almost-clique, and if so what type of special node $v$ is.
    
    Further, if $v$ is in a nice almost-clique $C$, it knows whether $C$ contains a special node or a non-edge (this matters for \Cref{algo:color-n}). This is because special nodes and non-edges can be identified by the arguments above, and if $M(v)$ knows that $v$ is a special node or the endpoint of a non-edge in its almost-clique, this information can be broadcast to all nodes of $C$ in $O(1)$ rounds (recall that $C$ has constant diameter).

    This computation is therefore sufficient to classify the nodes into $\mathcal{O}$, $\mathcal{R}$, $\mathcal{N}$, or $\mathcal{G}$ immediately. Since each machine responsible for a node knows whether its node is a picked special node, they can check membership of $\mathcal{P}$ or $\mathcal{E}$. Finally, as machines also know if their nodes are in $V_{sparse}$, they can check membership of $\mathcal{V}_*$.
\end{proof}



\subsection{Full $\Delta$-coloring algorithm} 
\label{sec:implementation_of_coloring}

Algorithm~\ref{algo:Delta-main} is the high level description of the $\Delta$-coloring algorithm in low-space \MPC when $\Delta=\omega(\log^{3} n)$. For formatting reasons, we defer the pseudocode of the subroutines to \Cref{sec:additional_subroutines}. The description is same as the algorithm of for $\Delta$-coloring in the {\sc Congest} model~\cite{FHM23}. Here, our contribution is to argue that the same can be implemented in sublinear \MPC. The algorithm colors the venodesn a specific way such that it breaks the coloring procedure into constant many (Deg+1)-List-Coloring instances. Note that each such  (Deg+1)-List-Coloring instance can be solved with $O(\log \log  \log  n)$ rounds in sublinear \MPC, thanks to \Cref{thm:main:deter}.
\begin{remark}
In the following algorithm and subsequent description, when we color a node $c$ with color $v$, $v$ is considered to be deleted from the graph and $c$ is removed from the palettes of all neighbors of $v$.

When we say we color a set of nodes $U \subseteq V$, we mean that we color the nodes of the graph $G[U]$.

Initially, all nodes have the palette $[\Delta]$.
\end{remark}

\begin{algorithm}[H]
\caption{$\Delta$-{\sc Coloring-LowSpace}}  \label{algo:Delta-main}
Compute an ACD as defined in Definition~\ref{defi:acd-delta}.

Classify the nodes into different types using Lemma~\ref{lem:group}.

Run {\sc SlackGeneration} (Algorithm~\ref{algo:slack}) on $\mathcal{V}_* \sqcup \mathcal{O} \sqcup \mathcal{R}$.

{\sc Color-$V_*$} (Algorithm~\ref{algo:color-v-star}): colors $V_* =V_{\text{sparse}} \setminus (\mathcal{P} \cup \mathcal{E})$.

{\sc Color-$\mathcal{O}$} (Algorithm~\ref{algo:color-o}): colors $\mathcal{O}$.

{\sc Color-$\mathcal{R}$} (Algorithm~\ref{algo:color-r}): colors $\mathcal{R}$.

{\sc Color-$\mathcal{N}$} (Algorithm~\ref{algo:color-n}): colors $\mathcal{N}$.

{\sc Color-$\mathcal{GP}$} (Algorithm~\ref{algo:color-g}): colors $\mathcal{G} \cup \mathcal{P}$.

{\sc Color-$\mathcal{E}$} (Algorithm~\ref{algo:color-e}): colors $\mathcal{E}$.
\end{algorithm}


Finally, we prove \Cref{lem:high_degree_delta_col}, arguing that \Cref{algo:Delta-main} can be implemented in $O(\log\log\log n)$ rounds of \MPC.
\begin{proof}[Proof of \Cref{lem:high_degree_delta_col}]
Correctness of the algorithm follows from arguments made in \cite{FHM23}.
Regarding implementation: we look at all of the steps of \Cref{algo:Delta-main} in turn:

\begin{enumerate}
    \item By \Cref{lem:mpc-acd-delta}, we can compute an ACD in \MPC in $O(1)$ rounds.
    \item By \Cref{lem:group}, we can classify all nodes into their respective sets in $O(1)$ \MPC rounds.
    \item \Cref{algo:slack} requires each machine responsible for a node to make a random choice and broadcast it to neighbors: by \Cref{lem:mpc_capabilities} this can be done in $O(1)$ rounds.
    \item \Cref{algo:color-v-star} just requires a single use of \Cref{thm:main:deter}.
    \item \Cref{algo:color-o}: $M(v)$ can determine whether $v$ has slack by arguments similar to those in \Cref{lem:computing_parameters} (we no longer have $\Delta < \sqrt{\lspace}$, but slack can still be computed using prefix sum). The remainder of the algorithm consists of two uses of \Cref{thm:main:deter}.
    \item \Cref{algo:color-r}: $M(v)$ knows whether $v$ is a member of a runaway clique and whether it is an escape node. Machines can broadcast whether their node is an escape node, and subsequently determine whether their node is adjacent to its clique's escape node (and hence in $\mathcal{Y}$). The remainder of the algorithm is two uses of \Cref{thm:main:deter}.
    \item \Cref{algo:color-n}: By \Cref{lem:group}, machines know which subtype of nice almost-clique they belong to. Then, depending on which subtype a node's almost-clique is, we do one of the following:
        \begin{enumerate}
            \item \Cref{algo:color-n-1}: Similar to \Cref{algo:color-r}, machines can determine whether their node is adjacent to a special node. The remainder of the algorithm is two uses of \Cref{thm:main:deter}.
            \item \Cref{algo:color-n-2}: By \Cref{lem:group}, machines know whether their node is the simplicial node in that node's almost-clique with the lowest ID. The remainder of the algorithm is two uses of \Cref{thm:main:deter}.
            \item \Cref{algo:color-n-3}: By \Cref{lem:group}, we have identified a non-edge from the all nice almost-cliques which have them. We can compute the union of the palettes of each edge using sorting, and then gather all of these non-edges on a dedicated series of machines.\\
            Machines can broadcast whether the nodes they are responsible for are endpoints of a non-edge in their almost-clique, and we can then send all edges for which both endpoints are endpoints of a non-edge, and send those to the same dedicated series of machines. Those machines can determine which non-edges are adjacent and create the virtual graph, and call the D1LC algorithm on that graph (\Cref{thm:main:deter}) and color the endpoints as described.\\
            Then, machines can check whether their nodes are adjacent to both endpoints of the almost-clique containing their node, and after two more uses of \Cref{thm:main:deter}, we are done.
        \end{enumerate}
    \item \Cref{algo:color-g}: We can identify a non-neighbor of the protector node of each clique using sorting. After this is done, the remaining steps of this algorithm mirror that of \Cref{algo:color-n-3}.
    \item \Cref{algo:color-e} just uses \Cref{thm:main:deter} once.
\end{enumerate}
    
Since we can implement each of its steps in $O(\log\log\log n)$ rounds in the stated local and global space bounds, we can implement \Cref{algo:Delta-main} in $O(\log\log\log n)$ rounds: this concludes the proof of the lemma.
\end{proof}



\section{Preliminaries (and overview of \cite{hknt_local_d1lc})}

\label{sec:overview_of_hknt}

In this section, we give an overview of the recent work of \cite{hknt_local_d1lc}. Among several other contributions, \cite{hknt_local_d1lc} give an algorithm which, in the \LOCAL model of distributed computing, given an input instance of $(\deg + 1)$-list coloring {for a graph $G$}, colors all nodes of $G$ having degrees at least $\log ^7 n$  in $O(\log^* n)$ rounds. We give an overview of their algorithm here, because our algorithm in \MPC to color all nodes in the degree range $\left[\log^7 n, n^{7\delta}\right]$ uses steps of the algorithm in~\cite{hknt_local_d1lc}, where $\delta < \phi$ (recall that $\phi$ is our local space parameter, i.e., $\lspace=O(n^\phi)$). In particular, \Cref{sec:randomized-d1lc} argues that we can simulate this algorithm of {\cite{hknt_local_d1lc}} in sublinear \MPC, and \Cref{sec:deterministic_d1lc} argues that we can do so deterministically.

The algorithm of \cite{hknt_local_d1lc} (for the degree range $[\log^7 n, n]$) requires several careful arguments which rely on nodes being able to calculate certain parameters, in particular the following:

\begin{definition}[Parameters from \cite{hknt_local_d1lc}]
\label{def:parameters}
The following node-parameters are used in the coloring algorithm of \cite{hknt_local_d1lc}:
\begin{itemize}
\item \emph{Slack} $\slack{v} = p(v) - d(v)$: The difference between a the size of the palette of a node and its degree. 
\item \emph{Sparsity} $\sparsity{v} = \frac{1}{d(v)} \cdot \left[ \binom{d(v)}{2} - m(N(v)) \right]$~\footnote{{$m(N(v))$ denotes the number of edges in the subgraph induced by $N(v)$}.}: The proportion of non-edges between neighbors of $v$. 
\item \emph{Disparity} $\disparity{u}{v} = |\Psi(u) \setminus \Psi(v)| / |\Psi(u)|$: The (proportional) difference between the palettes of $u$ and $v$.
\item \emph{Discrepancy} $\discrepancy{v} = \sum_{u \in N(v)} \discrepancy{u, v}$: The sum of disparities between $v$ and its neighbors.
\item \emph{Unevenness} $\unevenness{v} = \sum_{u \in N(v)} \frac{\max(0, d(u) - d(v))}{d(u) + 1)}$: A quantity that relates to how many of the neighbors of $v$ have much higher degree than $v$.
\item \emph{Slackability} $\slackability{v} = \discrepancy{v} + \sparsity{v}$, \emph{Strong Slackability} $\sslackability{v} = \unevenness{v} + \sparsity{v}$: How easy it is to create slack for a node.
\end{itemize}
\end{definition}

The coloring algorithm for the degree range $\left[\log^7 n, n\right]$ given in \cite{hknt_local_d1lc} has the form outlined as  \Cref{alg:color_middle_degrees}. As already mentioned, we use that algorithm to color the nodes with degrees in $[\log^7 n, n^{7\delta}]$. We will discuss the subroutines of \Cref{alg:color_middle_degrees} in subsequent subsections from \Cref{sec:acd}--\Cref{sec:dense}.

\begin{algorithm}[H]
\caption{${\text{\sc ColorMiddle}}$: Colors nodes with degrees in $[\log^7 n, n^{7\delta}]$ {\small (extracted from \cite{hknt_local_d1lc})}}
\label{alg:color_middle_degrees}

Compute an almost-clique decomposition (ACD).

{\sc ColorSparse} (\Cref{alg:color_sparse}).

{\sc ColorDense} (\Cref{alg:color_dense}).
\end{algorithm}



 

\subsection{Computing an almost-clique decomposition}
\label{sec:acd}

First, the authors of \cite{hknt_local_d1lc} compute an \emph{almost-clique decomposition} of the input graph. The almost-clique decomposition is as follows:

\begin{definition}[{$(\mbox{deg}+1)$-ACD}~\cite{AlonA20, hknt_local_d1lc}]
\label{def:acd}
Let $G=(V,E)$ be a graph and $\veps_{ac}, \veps_{sp} \in (0,1)$ be constants. A partition of $V$ into $V_{\text{sparse}} \sqcup V_{\text{uneven}} \sqcup V_{\text{dense}}$, such that $V_{\text{dense}}$ can be partitioned into $C_1, \ldots, C_t$ for some $t$, is said to be an almost-clique decomposition for $G$ if


\begin{enumerate}
\item[(i)] Every $v \in V_{\text{sparse}}$ is $\veps_{sp}  d(v)$-sparse;
\item[(ii)] Every $v \in V_{\text{uneven}}$ is $\veps_{sp} d(v)$-uneven;
\item[(iii)] For every $i \in [t]$ and $v \in C_i$, $d(v)\leq (1+\veps_{ac})\size{C_i}$;
\item[(iv)] For every $i \in [t]$ and $v \in C_i$, $\size{C_i} \leq (1+\veps_{ac})\size{N(v) \cap C_i}.$

\end{enumerate}
\end{definition}

Intuitively:

\begin{itemize}
\item $V_{\text{sparse}}$: A set of (sufficiently) \emph{sparse} nodes. A node is $\varepsilon$-sparse if there are $\varepsilon \cdot d(v)$ many non-edges between neighbors of $v$.
\item $V_{\text{uneven}}$: A set of \emph{uneven} nodes. A node is uneven if many of its neighbors have a much higher degree than $v$. Formally, a node $v$ is $\eta$-uneven if $\eta_v \geq \sum_{u \in N(v)} \frac{\max(0, d(u) - d(v))}{d(u) + 1}$.

\item $V_{\text{dense}}=C_1\sqcup \dots \sqcup C_t$: Each $C_i$ is an \emph{almost-clique}. Intuitively, almost-cliques are very well connected parts of the graph. Formally, for some $\varepsilon_{sc}$ and for all almost-cliques $C_i$ and all $v \in C_i$, $d(v) \leq (1 + \varepsilon_{ac}) \size{C_i}$, and $\size{C_i} \leq (1 + \varepsilon_{ac}) \size{N(v) \cap C_i}$.

\end{itemize}

After computing an ACD and partitioning the nodes into $V_{\text{sparse}}$, $V_{\text{uneven}}$ and $V_{\text{dense}}$,  nodes in $V_{\text{sparse}} \sqcup V_{\text{uneven}}$ are colored first by \textsc{ColorMiddle} (\Cref{alg:color_middle_degrees}) by subroutine \textsc{ColorSparse} and then the nodes in $V_{\text{dense}}=C_1 \sqcup \dots \sqcup C_t$ are colored hy subroutine \textsc{ColorDense}. 

\subsection{Coloring sparse nodes}\label{sec:sparse}
Before discussing about coloring the sparse nodes, we first discuss about a subroutine  {\sc SlackColor} (\Cref{alg:slack_color}) which is used by Step 2 and 3 of \Cref{alg:color_middle_degrees} for colorng sparse nodes and dense nodes, respectively.

\textsc{SlackColor} colors all nodes which have slack linear in their degree (and the degree is large enough) in $O(\log^*n)$ rounds. The authors of \cite{hknt_local_d1lc}  point out that similar results were shown in \cite{sw10-distributed-symmetry-breaking}: in this paper we focus on simulating and derandomizing the algorithm {\sc SlackColor} given in \cite{hknt_local_d1lc}.
Note that {\sc SlackColor} takes two parameter $s_{min}$ and $\kappa$ as input where $s_{min}$ is an integer that lies between $1$ and the minimum amount of slack of any node (that we want to color) and $\kappa$ lies between $1/s_{min}$ and $1$. \footnote{The success probability of \textsc{SlackColor} depends on $s_{min}$ and $\kappa$. Also, \cite{hknt_local_d1lc} uses \textsc{SlackColor} only when $s_{min}=\Omega(\ell)$ and $\kappa$ is a suitable constant, where \mbox{$\ell=\log^{2.1}\Delta$ .}}.\textsc{SlackColor} requires two subroutines, i.e., \textsc{TryRandomColor} (\Cref{alg:try_random_color}) and \textsc{MultiTrial} (\Cref{alg:multi_trial}).

\begin{algorithm}[H]
\caption{\textsc{SlackColor}$(s_{min})$, for node $v$, from \cite{hknt_local_d1lc}}
\label{alg:slack_color}


Do ${\text{\sc TryRandomColor}}(v)$ for $O(1)$ rounds.

If $\slack{v} < 2d(v)$ then terminate.


Let $\rho\gets s_{\min}^{1/(1+\kappa)}$. \textit{\small \hfill //$s_{min}$ is globally known such that $1<s_{min}\leq\min_{v}s(v)$ and $\kappa$ is an input parameter such that $1/s_{min}<\kappa \leq 1$.}

\For{$i$ from $0$ to $\log^* \rho$}{
$x_i \gets 2 \uparrow \uparrow i$ \textit{\small \hfill // {$2 \uparrow \uparrow i$ refers to iterated exponentiation, i.e., $2 \uparrow \uparrow 0 = 1$ and $2 \uparrow \uparrow (i + 1) = 2^{2 \uparrow \uparrow i}$}.}


$\text{{\sc MultiTrial}}(x_i)$ $2$ times

If $d(v) > \slack{v}/\min(2^{x_i},\rho^\kappa)$ then terminate.
}

\For{$i$ from $1$ to $\lceil 1/\kappa \rceil$}{

$x_i \gets \rho^{i \cdot \kappa}$.

$\text{{\sc MultiTrial}}(x_i)$ $3$ times.

If $d(v) > \slack{v}/\min(\rho^{(i+1) \cdot \kappa},\rho)$ then terminate.
}

$\text{{\sc MultiTrial}}(\rho)$.


\end{algorithm}
\begin{algorithm}[H]
\caption{{\sc TryRandomColor}(node $v$) from \cite{hknt_local_d1lc}}
\label{alg:try_random_color}
Pick $\psi_v$ u.a.r. from $\Psi(v)$.

Send $\psi_v$ to each $u \in N(v)$, receive the set $T = \{\psi_u : u \in N^+(v)\}$, where $N^+(v)$ is the set of neighbours whose colors ``conflict'' with $v$.

If $\psi_v \not\in T$ then permanently color $v$ with $\psi_v$.

Send and receive permanent colors, and remove the received one from $\Psi(v)$
\end{algorithm}

\begin{algorithm}[H]
\caption{$\text{\sc{MultiTrial}}(x)$ from \cite{hknt_local_d1lc}}
\label{alg:multi_trial}
$v$ picks a set $X_v$ of $x$ random colors in its palette $\Psi(v)$, sends them to neighbours

\If{$\exists \psi \in X_v$ such that $\forall u \in N(v), \psi \not \in X_v$}{

Permanently color $v$ with some such $\psi$ and broadcast to $N(v)$.
}
\end{algorithm}

The first coloring task is to color all nodes which are \emph{sparse} or \emph{uneven} (that are in $V_{\text{sparse}}$ or $V_{\text{uneven}}$). The outline of the subroutine which does this ({\sc ColorSparse}) is in \Cref{alg:color_sparse}.

\begin{algorithm}[H]
\caption{{\sc ColorSparse} from \cite{hknt_local_d1lc}}
\label{alg:color_sparse}
Identify set of nodes $V_{\text{start}} \subset V_{\text{sparse}}$.

{\sc GenerateSlack} in $G\left[(V_{\text{sparse}} \cup V_{\text{uneven}}) \setminus V_{\text{start}}\right]$.

{\sc SlackColor} $V_{\text{start}}$.

{\sc SlackColor} $V_{\text{sparse}} \setminus V_{\text{start}}$ and $V_{\text{uneven}}$.
\end{algorithm}

The challenge here is that all nodes at the start of the algorithm are only guaranteed to have constant slack: in the (easier) $(\Delta + 1)$-coloring problem, nodes of low-degree are guaranteed to have slack linear in their degree. The authors of \cite{hknt_local_d1lc} overcome this by identifying some subset of the nodes in $V_{\text{sparse}}$ for which it is hard to generate slack: they call this set $V_{\text{start}}$. They then generate some slack using a simple primitive ({\sc GenerateSlack}, \Cref{alg:generate_slack}) for nodes in $(V_{\text{sparse}} \cup V_{\text{uneven}}) \setminus V_{\text{start}}$.

\begin{algorithm}[H]
\caption{{\sc GenerateSlack} from \cite{hknt_local_d1lc}}
\label{alg:generate_slack}
$S \gets$ Each node $v$ is sampled into $S$ independently w.p. $1/10$.

For all $v \in S$ in parallel $\text{{\sc TryRandomColor}}(v)$.

\end{algorithm}




They show that nodes in $V_{\text{start}}$ have many neighbours in $(V_{\text{sparse}} \cup V_{\text{uneven}}) \setminus
V_{\text{start}}$, and so nodes in $V_{\text{start}}$ have considerable \emph{temporary slack} (slack obtained from neighbors being colored later). Since $(V_{\text{sparse}} \cup
V_{\text{uneven}}) \setminus V_{\text{start}}$ are exactly the nodes
which do not struggle to obtain slack, they have that all
sparse/uneven nodes have slack linear in their degree, and this is
enough to color them in $O(\log^* n)$ rounds (by the algorithm of
\cite{sw10-distributed-symmetry-breaking} or by {\sc SlackColor}).



\subsection{Coloring dense nodes}\label{sec:dense}

Finally, it remains to color nodes in the almost-cliques $C_1 \sqcup \dots \sqcup C_t$. The algorithm for doing this given in \cite{hknt_local_d1lc} is {\sc ColorDense} (\Cref{alg:color_dense}).

\begin{algorithm}[H]
\caption{{\sc ColorDense} from \cite{hknt_local_d1lc}}
\label{alg:color_dense}

Compute the leader $x_C$ and outliers $O_C$ for each almost-clique $C$. Let $O=\bigcup\limits_{C \in \cC} O_C$, where $\cC$ denotes the set of all almost-cliques.

{\sc GenerateSlack}.

$P_C \leftarrow${\sc PutAside}$(C)$ for each low slack almost-clique $C$. Let $P$ denote the set of union of all such $P_C$'s.

{\sc SlackColor} $O$.

{\sc SynhColorTrial} $V _{\text{dense}} \setminus P$.

{\sc SlackColor} $V_{\text{dense}} \setminus P $.

For each $C \in C$ with low slackability, $x_C$ collects the palettes in $P_C$ and colors the nodes locally.
\end{algorithm}


First, each almost-clique $C$ selects a \emph{leader} $x_C$, i.e., the node with minimum slackability (breaking ties arbitrarily).~\footnote{See Definition~\ref{def:parameters} for the definition of slackability of a node.} The slackability of an almost-clique $C$ is the slackability of the leader $x_C$. $C$ is said to an almost-clique with low slackability if its slackability is at most $\ell=\log^{2.1} \Delta$; otherwise $C$ has high slackability. For each each almost-clique with low slackability, we need to to remove some of the nodes to help the coloring process. The details will be discussed later in this section.

The intuition is that the leader has similar palettes to a large fraction of other nodes in the almost-clique, a property which will be useful later. Then the almost-clique is split roughly in half, into \emph{inliers} and \emph{outliers}: inliers are the nodes that are similar to the leader (by some criteria that we will discuss later), and outliers are nodes which are dissimilar to the leader. In particular, since outliers are (slightly less than) half of the almost-clique, they have slack linear in their degree and can be colored in $O(\log^* n)$ rounds by {\sc SlackColor} (if we defer the nodes in the inliers to be colored later).

Next, inliers need to be colored. Since the leader is very similar to the inliers (in particular: it is connected to all inliers and has a similar palette to them), a surprisingly simple process, where the leader proposes one of its colors to each inlier (such that it proposes each color to at most one inlier), suffices. The pseudocode for this process is given in {\sc SynchColorTrial} (\Cref{alg:synch}). After {\sc SynchColorTrial} has completed, there is linear slack at each remaining inlier to apply {\sc SlackColor} and color the remaining inliers in $O(\log^* n)$ rounds.

\begin{algorithm}
\caption{{\sc SynhColorTrial} for almost-clique $C$ \cite{hknt_local_d1lc}}
\label{alg:synch}
{

$x_C$ randomly permutes its pallete $\Psi(x_C)$, sends each neighbor $v \in I_C$ a distinct color $c(v)$.
\textit{\small \hfill // $I_C=C\setminus O_C$ denotes the set of inliers of the almost-clique $C$.}

Send $c(u)$ to $N(v)$, and receive the set $T=\{c(u):u \in N^{+}(v)\}$. \textit{\small \hfill // $N^+(v)$ is the set of neighbours whose colors ``conflict'' with $v$.}

If $c(v) \notin T$, then permanently color $v$ with $c(v)$.

Send and receive permanent colors, and remove the received one from $\Psi(v)$.
}
\end{algorithm}
\begin{algorithm}
\caption{{\sc PutAside}($C$) from~\cite{hknt_local_d1lc}}
\label{alg:put-aside}
{
Sample each node in $v \in I_C$ independently with probability $p_s=\frac{\ell^2}{48 \Delta_C}$, and include in the set $S_C$, where $\ell=\log ^{2.1} \Delta$.

Let $S$ denote the union of such $S_C$'s.

return $P_C$ as the set of nodes in $S_C$ that does not have a neighbor in $S$.
}
\end{algorithm}
Finally, there is an additional challenge to overcome for some almost-cliques. If the ``slackability'' (intuition: a measure of easy it is to generate slack) is low, {\sc SynchColorTrial} might not create enough slack for the remaining inliers. The authors of \cite{hknt_local_d1lc} get around this by computing a ``put-aside'' set: a set of inliers which we set aside and color at the end of the algorithm. The put-aside set is computed using {\sc PutAside} (\Cref{alg:put-aside}), and has three important properties: it is of polylogarithmic size (in each almost-clique that needs it); put-aside sets in different almost-cliques have no edges between them; and the inliers not assigned to the put-aside set each have {sufficient neighbours in the put-aside sets} which are to create sufficient slack to be colored ultrafast by {\sc SlackColor}.







\section{Introduction}
\label{sec:intro}

Graph coloring is one of the most fundamental problems in algorithms which has been studied extensively in various settings for several decades. It has been playing an especially prominent role in distributed and parallel computing, not only because of its numerous applications, but also since some variants of coloring problems naturally model typical symmetry breaking problems, as frequently encountered in decentralized systems (see, e.g., \cite{BE13} for an overview of early advances).
Parallel graph coloring has been studied since the 1980s \cite{karloff1985fast}, and nowadays $(\Delta+1)$-coloring and $(2\Delta-1)$-edge-coloring are considered among the most fundamental graph problems in the area (here and throughout the paper, $\Delta$ refers to the maximum degree of the input graph).

In this paper, we study the parallel complexity of a natural generalization of the $(\Delta+1)$-coloring problem\footnote{$(\Delta+1)$-coloring problem is to color a graph of maximum degree $\Delta$ using $\Delta+1$ colors.}, the problem of \emph{(degree+1)-list coloring} (\DILC). In the \DILC problem, for a given undirected graph $G = (V,E)$, each node has an input palette of acceptable colors of size one more than its degree, and the objective is to find a proper coloring using these palettes. While it is easy to design a simple linear-time (sequential) greedy algorithm for \DILC, the parallel and distributed complexity of \DILC is less well understood. Clearly, the problem is not easier than the $(\Delta+1)$-coloring problem and its variant the $(\Delta + 1)$-list coloring proble
\footnote{In $(\Delta + 1)$-list coloring each node has a palette of $\Delta+1$ many colors for its disposal (rather than $\deg+1$ many, as in~\DILC).}, but the challenge of dealing with nodes having color palettes of greatly different sizes seems to make the problem significantly more difficult. Still, it has been observed that by using techniques developed in \cite{FHK16,Kuhn20}, one can deterministically reduce \DILC to $(\Delta + 1)$-list coloring with only an $O(\log\Delta)$ multiplicative and $O(\log^*n)$ additive overhead in the running time (number of rounds). However the logarithmic complexity gap is still significant and until very recently, this gap has been elusive for the most efficient distributed and parallel algorithms for \DILC. The first advance (in the distributed setting) has come only very recently, when Halld{\'o}rsson, Kuhn, Nolin, and Tonoyan \cite{hknt_local_d1lc} presented a randomized $O(\log^3\log n)$-rounds distributed algorithm for \DILC in the \LOCAL distributed model, matching the state-of-the art complexity for the simpler $(\Delta+1)$-coloring problem due to Chang, Li, and Pettie \cite{CLP20}). In another very recent work, Halld{\'o}rsson, Nolin, and Tonoyan \cite{HNT22} extended the framework and showed that \DILC can be solved
in $O(\log^5\log n)$-rounds in the distributed \CONGEST model, matching the state-of-the-art complexity for the simpler $(\Delta+1)$-coloring problem in \CONGEST by Halld{\'{o}}rsson, Kuhn, Maus, and Tonoyan~\cite{HKMT21}).

In this paper we demonstrate that the \DILC problem can be efficiently solved in the \emph{Massively Parallel Computation} (\MPC) model with sublinear local space, matching the complexity of the state-of-the-art \MPC algorithms for the simpler $(\Delta+1)$-coloring and $(\Delta+1)$-list coloring problems.

\paragraph{MPC model}
The \emph{Massively Parallel Computation} (\MPC) model, introduced over a decade ago by Karloff \etal \cite{KSV10}, is a nowadays standard theoretical model for parallel algorithms. The model has been developed on the basis of its successful modeling of parallel and distributed frameworks such as MapReduce \cite{mapreduce}, Hadoop \cite{hadoop}, Dryad \cite{dryad}, and Spark \cite{spark}, and it shares many similarities to classical models of parallel computation (e.g., PRAM) and distributed models (e.g., \CONGESTEDC). In this paper, we focus on the \emph{low-space/sublinear} MPC regime, in which machines have local space $\lspace = O(n^{\phi})$ for any arbitrary constant $\phi \in (0,1)$, where $n$ is the number of nodes in the graph. This model has attracted a lot of attention recently, see, e.g., \cite{ANOY14,ASSWZ18,BKS17,BHH19,BKM20,BBDFHKU19,CC22,CFGUZ19,CDP20,CDP21,CDPsparse,CLMMOS18,GGKMR18,GKU19,GU19,GSZ11,LMOS20}. Recent works have provided many algorithms for fundamental graph problems such as connectivity, approximate matching, maximal matching, maximal independent set, and $(\Delta+1)$ coloring.

It is known that the $(\Delta+1)$-coloring algorithm due to Chang \etal \cite{CFGUZ19} can be combined with the network decomposition result of \cite{RG20} to obtain a randomized $O(\log\log\log n)$-round \MPC algorithm, which is currently the state-of-the-art bound for $(\Delta+1)$-list-coloring. Recently, Czumaj \etal \cite{CDP21} showed that this result can be derandomized, even for the list-coloring version of the problem. In this paper we show that these bounds can be matched on a sublinear local space \MPC for the more general \DILC problem.

We first show how to combine the \DILC framework for the \LOCAL model due to Halld{\'o}rsson \etal \cite{hknt_local_d1lc} with the techniques developed in earlier works on the \MPC model, to obtain a randomized \MPC algorithm for \DILC working in $O(\log\log\log n)$ rounds, w.h.p.
Then we present a more challenging part (our main contribution) and show that the $O(\log\log\log n)$-rounds randomized \MPC algorithm can be efficiently derandomized in the \MPC model.

\begin{theorem}[\textbf{Main}]
\label{thm:main:deter}
Let $\spacexp \in (0,1)$ be an arbitrary constant. There exists a deterministic algorithm that, for every $n$-node graph $G=(V,E)$, solves the \DILC problem using $O(\log\log\log n)$ rounds, in the low-space \MPC model with local space $\lspace = O(n^{\spacexp})$ and global space $O(m+n^{1+\spacexp})$.
\end{theorem}

Observe that the bound in \Cref{thm:main:deter} matches the state-of-the-art bound for the complexity of the simpler $(\Delta+1)$-coloring problem in the (low-space) \MPC model (see \cite{CFGUZ19} for the randomized bound and \cite{CDP21} for the deterministic bound). Furthermore, the recently developed framework connecting the complexity of \LOCAL and low-space \MPC algorithms (see \cite{GKU19,CDPcompstab}), provides some evidence that our upper bound cannot be asymptotically improved, unless the complexity of the $(\Delta+1)$-coloring problem is
$(\log\log n)^{o(1)}$
in the \LOCAL model. This is because \cite{GKU19,CDPcompstab} show that for a class of component stable algorithms and conditioned on the so-called 1-vs-2-cycles conjecture, no low-space \MPC algorithm can run faster than the logarithm of the complexity of \LOCAL algorithms. (Still, even conditioned on the 1-vs-2-cycles conjecture, it might be conceivable that a non-component stable randomized \MPC low-space algorithm can solve $(\Delta+1)$-coloring in $o(\log\log\log n)$ rounds, and further, we do not have any good enough \LOCAL lower bounds for coloring, and so maybe an
$(\log\log n)^{o(1)}$-rounds
\LOCAL algorithm is possible.)
Finally, notice that Roughgarden \etal \cite{RVW18} showed that proving any super-constant lower bound in the low-space \MPC for any problem in \model{P} would separate $\model{NC}^1$ from \model{P}, making any \emph{unconditional} super-constant (low-space \MPC) lower bound unlikely.

As an application of our framework for \DILC developed in \Cref{thm:main:deter}, we consider the related problem of $\Delta$-coloring graphs of maximum degree $\Delta \ge 3$ that contain no clique of size $\Delta+1$ (cf. Brooks' theorem \cite{Brooks41}). Recently, Fischer, Halld{\'o}rsson, and Maus \cite{FHM23} devised a general approach allowing to reduce in a constant number of rounds (in \LOCAL, \CONGEST, \and \CONGESTEDC) the $\Delta$-coloring problem to a constant number of instances of (degree+1)-list coloring, assuming $\Delta = \omega(\log^3 n)$ (for the \CONGESTEDC algorithm $\Delta = \omega(\log^{4+\varepsilon} n)$ is required, for a small constant $\varepsilon$). This was then extended to obtain a randomized $O(\text{poly}(\log\log n))$-rounds \LOCAL algorithm to $\Delta$-color any graph of maximum degree $\Delta \ge 3$ that does not contain a clique of size $\Delta+1$. In this paper we obtain a similar result for the \MPC model.

\begin{theorem}
\label{thm:delta_coloring}
Let $\spacexp \in (0,1)$ be an arbitrary
constant. There exists a randomized algorithm that for any $n$-node graph $G = (V,E)$ with maximum degree $\Delta \ge 3$ that does not contain a $(\Delta+1)$-clique, finds a $\Delta$-coloring of $G$ in $O(\log\log\log n)$ rounds on the low-space \MPC model with local space $\lspace =O(n^{\spacexp})$ and global space $O(m+n^{1+\spacexp})$.
\end{theorem}

In this paper, our main focus is on the proof of \Cref{thm:main:deter} and we defer its application to the proof of \Cref{thm:delta_coloring} to \Cref{sec:Delta-coloring}.










\subsection{Setting and notation}
\label{sec:setting}




For $k \in \mathbb{N}$, $[k]$ denotes the set $\{1,\ldots,k\}$. For $a,b \in \mathbb{N}$, $[a,b]$ denotes the set of integers in $\{a,a+1,\ldots,b\}$. We consider a graph
$G=(V,E)$ with $V$ as the node set and $E$ as the edge set with $|V|=n$ and $|E|=m$.
The set of neighbors of a node $v$ is denoted by $N(v)$ and the degree of a node $v$ is denoted by $d(v)$. For a node $v$, $\Psi(v)$ denotes the list of colors in the color palette of node $v$ and $p(v)$ denotes the size of $\Psi(v)$. The maximum degree of any node in $G$ is denoted by $\Delta$. As we go on coloring the nodes of the graph $G$, the graph will change and the color palettes of the nodes will also change. Often, we denote the current (rather than the input) graph by $G$. For all graphs we consider, we have $p(v)\geq d(v)+1$. For a subset $X \subseteq V$, $G[X]$ denotes the subgraph induced by $X$ and $m(X)$ denotes the number of edges in $G[X]$.

\paragraph{Degree+1 list coloring (D1LC)}


The \textbf{degree+1 list coloring (D1LC) problem} is for a given graph $G = (V,E)$ and given color palettes $\Psi(u)$ assigned to each node $u \in V$, such that $\size{\Psi(u)} \geq d(u)+1$, the objective to find a proper coloring of nodes in $G$ such that each node as assigned to a color from its color palette (and, as in proper coloring, no edge in $G$ is monochromatic).

\paragraph{Massively Parallel Computation model.}
We consider the \emph{Massively Parallel Computation} (\MPC) model, which is a parallel system with some number of machines, each of them having some local space~\lspace.
At the beginning of computation, each machine receives some part of the input, with the constraint that it must fit within its local space. In our case, for the \DILC problem, the input is a set of $n$ nodes, $m$ edges, and $n$ color palettes of total size $O(n+m)$. Hence we will require that the number of machines is $\Omega(\tfrac{n+m}{\lspace})$, for otherwise the input would not fit the system. The computation on an \MPC proceeds in synchronous rounds. In each round, each machine processes its local data and performs an arbitrary local computation on its data without communicating with other machines. At the end of each round, machines can exchange messages. Each message is sent only to a single machine specified by the machine that is sending the message. All messages sent and received by each machine in each round have to fit into the machine's local space. Hence, the total number of messages sent by any machine and received by any machine is bounded by \lspace, and the total amount of communication across the whole \MPC is bounded by \lspace times the number of machines. At the beginning of the next round, each machine can process all messages received in the previous round. When the algorithm terminates, machines collectively output the solution.


Observe that if a single machine can store the entire input, then any problem (like, e.g., \DILC) can be solved in a single round, since no communication is required. In order for our algorithms to be as scalable as possible, normally one wants to consider graph problems in the low-space regime, where local space $\lspace = n^{\spacexp}$ for any given constant $\spacexp \in (0,1)$. (There has been some research considering also the case when $\lspace = \Theta(n)$, or even when $\lspace = n^{1+\spacexp}$ (in which case one wants to study the case that $\lspace \ll m$) but we will not consider such setting in the current paper.) We will require that the number of machines is not significantly more than required,
specifically that it is $\widetilde{O}(n+\frac{m}{\lspace})$ (note that the optimal amount would be $\widetilde{O}(\frac{n+m}{\lspace})$, but our algorithm requires the ability to assign a machine to each node).
A major challenge in the design of \MPC algorithms in the low-space regime is that the local space of each machine is (possibly) not sufficient to store all the edges incident to a single node. This constraint naturally requires an \MPC algorithm to rely on extensive communication between machines, and most of the techniques known are based on some graph sparsification. It is important to note here that even in the low-space regime, the \MPC model is known \cite{GSZ11} to be stronger than the PRAM model, e.g., it is known that sorting\footnote{Here we consider sorting of $N$ objects on an \MPC with local space $N^{\gamma}$ and on $N^{1-\gamma}$ machines, for any constant $\gamma>0$.} (and in fact, many related tasks, like prefix sum computation) can be performed in a constant number of rounds, even deterministically, see \cite{GSZ11}. Observe that with this tool, we can gather nodes' neighborhoods to contiguous blocks of machines, and learn their degrees, in a constant number of rounds, and that we can assume, without loss of generality, that the input can be distributed arbitrarily on the first $\Theta(\tfrac{n+m}{\lspace})$ machines on an \MPC.

\subsection{Technical contribution}

While the \MPC model is notionally stronger than \LOCAL, writing \MPC algorithms which work in an exponentially lower number of rounds than their \LOCAL counterparts is often challenging, due to the limited local space of each machine in the \MPC model. In particular, nodes of high degree often cannot hope to collect their $2$-hop neighborhood in the \MPC model, and this renders many common subroutines in \LOCAL algorithms challenging (for example, in the setting of coloring, computing an almost-clique decomposition).

We overcome this technical difficulty using a deterministic recursive sparsification approach similar to \cite{CDP20,CDP21} where we repeatedly partition an instance of \DILC with maximum degree $\Delta$ into $n^\delta$ \DILC instances, each with maximum degree $\Delta / n^\delta$. Here $\delta \in (0,1)$ is a constant less than $\phi$ to be fixed later and $\phi$ is our local space parameter, i.e., $s=O(n^\phi)$. All but one of these instances are \emph{valid} \DILC instances and so can be solved immediately (using this recursive sparsification if the degree is still too high), and the final instance can only be solved when it is determined which colors are unused in the other instances. In this way, we can reduce the maximum degree of the \DILC instances which we have to solve to an arbitrarily small polynomial in $n$.

For \DILC instances with degrees between $\log^7 n$ and some arbitrarily small polynomial $n^\delta$, we show that a recent algorithm of \cite{hknt_local_d1lc} can be efficiently simulated in the \MPC setting using information only in the $2$-hop neighborhood of each node. Then, we show that the randomized subroutines from the algorithm of \cite{hknt_local_d1lc} can be efficiently derandomized, as they all satisfy a particular set of properties. The derandomization in this paper uses pseudorandom generators (PRGs) combined with the method of conditional expectations. The main difficulty is that PRGs restricted by the local space requirement of low-space \MPC will fail on some non-trivial proportion of nodes even if the base randomized procedure succeeds with high probability. Furthermore, in general this can cause a chain reaction of nodes failing to meet the success requirements of the randomized procedure. To overcome this difficulty, we formalize in \Cref{prng-derandomizable} a collection of properties that allow us to fully derandomize a procedure the PRG, which we prove in \Cref{lem:fullderand}. This lemma may be of independent interest as a powerful black-box derandomization technique in \MPC.

For instances with degree lower than $\log^7 n$, we can use an existing algorithm of Czumaj et al. \cite{CDP21}.

\paragraph{$\Delta$-coloring.}
The \DILC problem has been studied extensively in distributed and parallel setting largely thanks to its use in the designing of algorithms for other coloring problems. For example, \DILC has been frequently used as a subroutine in $(\Delta + 1)$-coloring algorithms (in the post-shattering step). Fischer \etal \cite{FHM23} observed recently that one can solve the $\Delta$-coloring problem using algorithms for \DILC: the approach due to Fischer \etal relies on a reduction from $\Delta$-coloring to a constant number of D1LC instances. We combine our \DILC \MPC algorithm from \Cref{thm:main:deter} with the reduction from \cite{FHM23} to obtain an $O(\log\log\log n)$-rounds algorithm for the $\Delta$-coloring problem (\Cref{thm:delta_coloring}). In particular, we show that the delicate almost-clique decomposition and classification required in the $\Delta$-coloring algorithm of Fischer \etal \cite{FHM23} can be performed in $O(1)$ rounds of sublinear \MPC. While the \CONGEST result of Fischer \etal \cite{FHM23} holds only for $\Delta = \omega(\log^3 n)$, our result does not require this restriction (i.e., holds for all values of $\Delta \geq 3$ provided the graph is $\Delta$-colorable, that is, provided it does not contain a $(\Delta+1)$-clique), exploiting the ability of \MPC to perform ``graph exponentiation'' when the maximum degree is low. (Because of space constraints, the analysis is deferred to \Cref{sec:Delta-coloring}.)

\subsection{Related work}

Our work continues a long line of research studying the parallel and distributed computing of graph coloring problems. For the references to earlier work on distributed coloring algorithms we refer to the monograph by Barenboim and Elkin \cite{BE13} (see also the influential papers by Linial \cite{Linial87,Linial92}). We will discuss here only more recent advances (and final results) for the four most relevant coloring problems, $(\Delta+1)$-coloring, $(\Delta+1)$-list-coloring, \DILC, and $\Delta$-coloring, leaving more detailed discussion and more references to the papers listed below. After extensive research in distributed computing for the $(\Delta+1)$-coloring problem, we understand its complexity for the \LOCAL, \CONGESTEDC, and also for the \MPC model, quite well. For \CONGESTEDC (and also for \MPC with linear memory, $\lspace = O(n)$), we know now how to solve $(\Delta+1)$-coloring in a constant number of rounds, see \cite{chang2018optimal,CDP20}. For the \LOCAL model, after a very long line of research, the current state of the art upper bound for randomized algorithm is by Ghaffari and Kuhn \cite{GK21} (see also \cite{RG20}), which achieves $O(\log^3\log n)$ round complexity w.h.p. There is no deterministic $o(\log n)$-round \LOCAL algorithm known. 

For the low-space \MPC, it is known that the $(\Delta+1)$-coloring algorithm due to Chang et al. \cite{CFGUZ19} can be combined with the network decomposition result of \cite{RG20} to obtain a randomized $O(\log\log\log n)$-round \MPC algorithm, which is currently the state-of-the-art bound for the problem; this result was derandomized by Czumaj \etal \cite{CDP21}. Furthermore, all algorithms mentioned above for $(\Delta+1)$-coloring can be extended to solve also $(\Delta+1)$-list-coloring. 

For the \DILC problem, which is a generalization of $(\Delta+1)$-coloring and $(\Delta+1)$-list-coloring, there have not been many comparable bounds until the very recent work of Halld{\'o}rsson \etal \cite{hknt_local_d1lc}. In \cite{hknt_local_d1lc}, by significantly extending the earlier approached for $(\Delta+1)$-coloring (in particular, to allow to efficiently deal with nodes of various degrees), it is shown that \DILC can be solved in $O(\log^3\log n)$ rounds in \LOCAL, w.h.p.
As a byproduct, the framework of Halld{\'o}rsson \etal \cite{hknt_local_d1lc} can be incorporated into a constant-round \MPC algorithm assuming the local \MPC space is slightly \emph{superlinear}, i.e., $O(n \log^4n)$ \cite[Corollary 2]{hknt_local_d1lc}.
A similar bound has been recently obtained for
the \CONGEST model in \cite{HNT22}, solving \DILC in $O(\log^5\log n)$ \CONGEST rounds, w.h.p.
We make extensive use of the framework laid out by Halld{\'o}rsson \etal \cite{hknt_local_d1lc} in their algorithm for \LOCAL in the design of our \DILC algorithm.

Our work relies also on some sparsification and derandomization techniques developed for parallel and distributed coloring algorithms.
With respect to derandomization,
our techniques are closely related to the recent papers by Czumaj \etal \cite{CDP20,CDP21}. The central approach underlying our analysis of derandomization using a pseudorandom generator has been used extensively before, though in the context of \MPC algorithms it has been used only recently in \cite{CDP21}.

In comparison to the perhaps more famous coloring problems listed above, relatively little has been written about the problem of $\Delta$-coloring in the parallel and distributed setting. In the \LOCAL model, the problem was first discussed in a paper by Panconesi and Srinivasan \cite{local_nature_delta_coloring}, and that algorithm was recently revisited and improved by Ghaffari \etal \cite{DBLP:journals/dc/GhaffariHKM21}.
In distributed models with communication bandwidth restrictions (and so, the setting closer to the model we consider here), the first breakthrough came only recently, in a paper by Fischer, Halld{\'o}rsson, and Maus \cite{FHM23}. They gave a \CONGEST algorithm which gives a $\Delta$-coloring of the graph in $O(\poly \log \log n)$ rounds if $\Delta = \omega(\log^3 n)$, which decreases to $O(\log^* n)$ rounds if $\Delta = \omega(\log^{21} n)$. Their result for large values of $\Delta$ was a substantial improvement over the state-of-the-art in any distributed model ($O(\log n)$ rounds previously being the best known), and combined with the result given by \cite{DBLP:journals/dc/GhaffariHKM21} for sub-polylogarithmic values of $\Delta$, they also achieved a $O(\poly\log\log n)$ round algorithm for $\Delta$-coloring in the \LOCAL model.

We are not aware of any direct prior work on $\Delta$-coloring in any model of parallel computing. An $O(\log n)$ round algorithm for \PRAM follows straightforwardly from the $O(\log n)$-round MIS algorithm of Luby \cite{Luby86}: one can find an MIS and defer coloring those nodes, the remaining nodes all gain at least $1$ slack, and therefore form a $(\Delta+1)$-coloring instance. This algorithm can be transformed into a $O(\log n)$ round \MPC algorithm, using well-known results relating to the simulation of \PRAM algorithms in \MPC. We note that the complexity of our algorithm represents a double-exponential improvement over this.

\hide{\subsection*{Notations}
\begin{itemize}
\item $G(V,E)$-- Graph with $n$ nodes.

\item $d_{G}(v)$--- The degree of $v$ in $G$.

\item $N_{G}(v)$--- The neighborhood of $v$ in $G$.

\item $G[W]$--- The subgraph of $G$ induced by $W \subseteq V$.

\item $O(n^\spacexp)$--- The space bound of each machine \peter{I changed this because the parts from \cite{CDP20} already use $\delta$ for something else, and we'll need to compare $\delta$ and $\eps$ later.}
\end{itemize}
}

\input{parts/preliminaries}

\input{parts/d1lc_randomised}

\input{parts/d1lc_deterministic}



\bibliographystyle{ACM-Reference-Format}
