\title{\bf Decentralized adaptation in interconnected uncertain systems with nonlinear parametrization}

\begin{abstract}
We propose a technique for the design and analysis of decentralized adaptation algorithms in interconnected dynamical systems. Our technique does not require Lyapunov stability of the target dynamics and allows nonlinearly parameterized uncertainties. We show that for the considered class of systems,
conditions for reaching the control goals can be formulated in terms of the nonlinear $L_2$-gains of target dynamics of each interconnected subsystem. Equations for decentralized controllers and corresponding adaptation algorithms are also explicitly provided.

{\it Keywords:} nonlinear parametrization; unstable,
non-equilibrium dynamics; decentralized adaptive control; monotone functions
\end{abstract}

\section*{Notation}

According to the standard convention, $\mathbb{R}$ defines the field of real numbers and $\mathbb{R}_{\geq c}=\{x\in\mathbb{R}|x\geq c\}$,
$\mathbb{R}_{+}=\mathbb{R}_{\geq 0}$; symbol $\mathbb{R}^n$ stands for a linear space $\mathcal{L}(\mathbb{R})$ over the field of reals with
$\mathrm{dim}\{\mathcal{L}(\mathbb{R})\}=n$; $\|\mathbf{x}\|$ denotes the Euclidian norm of $\mathbf{x}\in\mathbb{R}^n$; $\mathcal{C}^k$ denotes the space of functions that are at least $k$ times differentiable;
$\mathcal{K}$ denotes the class of all strictly increasing functions $\kappa: \mathbb{R}_+\rightarrow \mathbb{R}_+$ such that
$\kappa(0)=0$. By ${L}_{p}^n[t_0,T]$, where $T>0$, $p\geq 1$ we denote the space of all functions $\mathbf{f}:\mathbb{R}_+\rightarrow\mathbb{R}^n$
such that
$\|\mathbf{f}\|_{p,[t_0,T]}=\left(\int_{0}^T\|\mathbf{f}(\tau)\|^{p}d\tau\right)^{1/p}<\infty$;
$\|\mathbf{f}\|_{p,[t_0,T]}$ denotes the ${L}_{p}^n[t_0,T]$-norm of
$\mathbf{f}(t)$. By ${L}^n_\infty[t_0,T]$ we denote the space of all functions $\mathbf{f}:\mathbb{R}_+\rightarrow\mathbb{R}^n$ such that
$\|\mathbf{f}\|_{\infty,[t_0,T]}={\mathrm{ess}} \sup\{\|\mathbf{f}(t)\|,t \in
[t_0,T]\}<\infty$, and $\|\mathbf{f}\|_{\infty,[t_0,T]}$ stands for the
${L}^n_\infty[t_0,T]$ norm of $\mathbf{f}(t)$.

A function $\mathbf{f}(\mathbf{x}): \mathbb{R}^{n}\rightarrow \mathbb{R}^m$ is said to be locally bounded if for any $\|\mathbf{x}\|<\delta$ there exists a constant $D(\delta)>0$ such that the following inequality holds:
$\|\mathbf{f}(\mathbf{x})\|\leq D(\delta)$. Let $\Gamma$ be an $n\times n$
square matrix, then $\Gamma>0$ denotes a positive definite
(symmetric) matrix, and $\Gamma^{-1}$ is the inverse of $\Gamma$.
By $\Gamma\geq 0$ we denote a positive semi-definite matrix,
$\|\mathbf{x}\|_{\Gamma}^2$ to denotes the quadratic form:
$\mathbf{x}^{T}\Gamma\mathbf{x}$, $\mathbf{x}\in\mathbb{R}^n$. The notation $|\cdot|$
stands for the modulus of a scalar. The solution of a system of differential equations $\dot{\mathbf{x}}=\mathbf{f}(\mathbf{x},t,{\boldsymbol{\theta}},\mathbf{u}), \
\mathbf{x}(t_0)=\mathbf{x}_0$, $\mathbf{u}:\mathbb{R}_+\rightarrow\mathbb{R}^m$,
${\boldsymbol{\theta}}\in\mathbb{R}^d$ for $t\geq t_0$ will be denoted as
$\mathbf{x}(t,\mathbf{x}_0,t_0,{\boldsymbol{\theta}},\mathbf{u})$, or simply as $\mathbf{x}(t)$ if it is clear from the context what the values of $\mathbf{x}_0,{\boldsymbol{\theta}}$
are and how the function $\mathbf{u}(t)$ is defined.

Let $\mathbf{u}:\mathbb{R}^n\times\mathbb{R}^d\times\mathbb{R}_+\rightarrow\mathbb{R}^m$ be a function of state $\mathbf{x}$, parameters $\hat{{\boldsymbol{\theta}}}$, and time
$t$. Let in addition both $\mathbf{x}$ and $\hat{{\boldsymbol{\theta}}}$ be functions of $t$. Then in case the arguments of $\mathbf{u}$ are clearly defined by the context, we will simply write $\mathbf{u}(t)$ instead of
$\mathbf{u}(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)$.

The (forward complete) system
$\dot{\mathbf{x}}=\mathbf{f}(\mathbf{x},t,{\boldsymbol{\theta}},\mathbf{u}(t))$, is said to have an
$L_{p}^m [t_0,T]\mapsto L_{q}^n[t_0,T]$, gain ($T\geq t_0$,
$p,q\in\mathbb{R}_{\geq 1}\cup\infty$) with respect to its input
$\mathbf{u}(t)$ if and only if $\mathbf{x}(t,\mathbf{x}_0,t_0,{\boldsymbol{\theta}},\mathbf{u}(t))\in L_{q}^n [t_0,T]$ for any $\mathbf{u}(t)\in L_{p}^m [t_0,T]$ and there exists a function
$\gamma_{q,p}:\mathbb{R}^n\times\mathbb{R}^d\times\mathbb{R}_+\rightarrow\mathbb{R}_+$
such that the following inequality holds:
$\|\mathbf{x}(t)\|_{q,[t_0,T]}\leq
\gamma_{q,p}(\mathbf{x}_0,{\boldsymbol{\theta}},\|\mathbf{u}(t)\|_{p,[t_0,T]})$. The function $\gamma_{q,p}(\mathbf{x}_0,{\boldsymbol{\theta}},\|\mathbf{u}(t)\|_{p,[t_0,T]})$
is assumed to be non-decreasing in $\|\mathbf{u}(t)\|_{p,[t_0,T]}$, and locally bounded in its arguments.

For notational convenience when dealing with vector fields and partial derivatives we will use the following extended notion of the Lie derivative of a function. Let $\mathbf{x}\in\mathbb{R}^n$ and assume
$\mathbf{x}$ can be partitioned as follows $\mathbf{x}=\mathbf{x}_1\oplus\mathbf{x}_2$,
where $\mathbf{x}_1\in\mathbb{R}^q$, $\mathbf{x}_1=(x_{11},\dots,x_{1q})^T$,
$\mathbf{x}_2\in\mathbb{R}^p$, $\mathbf{x}_2=(x_{21},\dots,x_{2p})^T$, $q+p=n$, and
$\oplus$ denotes the concatenation of two vectors. Define
$\mathbf{f}:\mathbb{R}^{n}\rightarrow\mathbb{R}^n$ such that
$\mathbf{f}(\mathbf{x})=\mathbf{f}_1(\mathbf{x})\oplus\mathbf{f}_2(\mathbf{x})$, where
$\mathbf{f}_1:\mathbb{R}^n\rightarrow\mathbb{R}^q$,
$\mathbf{f}_1(\cdot)=(f_{11}(\cdot),\dots,f_{1q}(\cdot))^T$,
$\mathbf{f}_2:\mathbb{R}^n\rightarrow\mathbb{R}^p$,
$\mathbf{f}_2(\cdot)=(f_{21}(\cdot),\dots,f_{2p}(\cdot))^T$. Then
$L_{\mathbf{f}_i(\mathbf{x})}\psi(\mathbf{x},t)$, $i\in\{1,2\}$ denotes the Lie derivative of the function $\psi(\mathbf{x},t)$ with respect to the vector field $\mathbf{f}_i(\mathbf{x},{\boldsymbol{\theta}})$:
$L_{\mathbf{f}_i(\mathbf{x})}\psi(\mathbf{x},t)=\sum_{j}^{\dim{\mathbf{x}_i}}\frac{{\partial}
\psi(\mathbf{x},t) }{{\partial} x_{ij}}f_{ij}(\mathbf{x},{\boldsymbol{\theta}})$.

\section{Introduction}

We consider the problem how to control the behavior of complex dynamical systems composed of interconnected lower-dimensional subsystems. Centralized control of these systems is practically inefficient because of high demands for computational power,
measurements and prohibitive communication cost. On the other hand, standard decentralized solutions often face severe limitations due to the deficiency of information about the interconnected subsystems. In addition, the nature of their their interconnections may vary depending on conditions in the environment. In order to address these problems in their most general setup, decentralized adaptive control is needed.

Currently there is a large literature on decentralized adaptive control which contains successful solutions to problems of adaptive stabilization \cite{Gavel_1989,Jain_1997}, tracking
\cite{Ioannou86,Jain_1997,Shi_1992,Passino96}, and output regulation \cite{Jiang_2000,Huang_2003} of linear and nonlinear systems. In most of these cases the problem of decentralized control is solved within the conventional framework of adaptive stabilization/tracking/regulation by a family of linearly parameterized controllers. While these results may be successfully implemented in a large variety of technical and artificial systems, there is room for further improvements. In particular,
when the target dynamics of the systems is not stable in the Lyapunov sense but intermittent, meta-stable, or multi-stable
\cite{Arecchi_2004,Raffone_2003,Tsuda_2004} or when the uncertainties are nonlinearly parameterized
\cite{Armstrong_1993,Boskovic_1995,Canudas_1999,Kitching_2000},
and no domination of the uncertainties by feedback is allowed.

In the present article we address these issues at once for a class of nonlinear dynamical systems. Our contribution is that we provide conditions ensuring forward-completeness, boundedness and asymptotic reaching of the goal for a pair of interconnected systems with uncertain coupling and parameters. Our method does not require availability of a Lyapunov function for the desired motions in each subsystem, nor linear parametrization of the controllers. Our results can straightforwardly be extended to interconnection of arbitrary many (but still, a finite number of)
subsystems. Explicit equations for corresponding decentralized adaptive controllers are also provided.

The paper is organized as follows. In Section 2 we provide a formal statement of the problem, Section 3 contains necessary preliminaries and auxiliary results. In Section 4 we present the main results of our current contribution, and in Section 5 we provide concluding remarks to our approach.

\section{Problem Formulation}

Let us consider two interconnected systems $\mathcal{S}_x$ and
$\mathcal{S}_y$:
\begin{eqnarray}
&\mathcal{S}_x: & \
\dot{\mathbf{x}}=\mathbf{f}(\mathbf{x},{\boldsymbol{\theta}}_x)+\gamma_y(\mathbf{y},t)+
\mathbf{g}(\mathbf{x})u_x \label{eq:system:s1} \\
&\mathcal{S}_y: & \
\dot{\mathbf{y}}=\mathbf{q}(\mathbf{y},{\boldsymbol{\theta}}_y)+\gamma_x(\mathbf{x},t)+\mathbf{z}(\mathbf{y})u_y\label{eq:system:s2}
\end{eqnarray}
where $\mathbf{x}\in\mathbb{R}^{n_x}$, $\mathbf{y}\in\mathbb{R}^{n_y}$ are the state vectors of systems $\mathcal{S}_x$ and $\mathcal{S}_y$, vectors
${\boldsymbol{\theta}}_x\in\mathbb{R}^{n_{\theta_x}}$,
${\boldsymbol{\theta}}_y\in\mathbb{R}^{n_{\theta_y}}$ are unknown parameters,
functions
$\mathbf{f}:\mathbb{R}^{n_x}\times\mathbb{R}^{n_{\theta_x}}\rightarrow\mathbb{R}^{n_x}$,
$\mathbf{q}:\mathbb{R}^{n_y}\times\mathbb{R}^{n_{\theta_y}}\rightarrow\mathbb{R}^{n_y}$,
$\mathbf{g}:\mathbb{R}^{n_x}\rightarrow\mathbb{R}^{n_x}$,
$\mathbf{z}:\mathbb{R}^{n_y}\rightarrow\mathbb{R}^{n_y}$ are continuous and locally bounded. Functions
$\gamma_y:\mathbb{R}^{n_y}\times\mathbb{R}_+\rightarrow\mathbb{R}_n$,
$\gamma_x:\mathbb{R}^{n_x}\times\mathbb{R}_+\rightarrow\mathbb{R}^{n_y}$, stand for nonlinear, non-stationary and, in general, unknown couplings between systems $\mathcal{S}_x$ and $\mathcal{S}_y$, and
$u_x\in\mathbb{R}$, $u_y\in\mathbb{R}$ are the control inputs.

In the present paper we are interested in the following problem

\begin{problem}\label{problem:decentralized}\normalfont Let $\psi_x:\mathbb{R}^{n_x}\times\mathbb{R}_+\rightarrow\mathbb{R}$,
$\psi_y:\mathbb{R}^{n_y}\times\mathbb{R}_+\rightarrow\mathbb{R}$ be the goal functions for systems $\mathcal{S}_x$, $\mathcal{S}_y$
respectively. In the other words, for some values
$\varepsilon_x\in\mathbb{R}_{+}$, $\varepsilon_y\in\mathbb{R}_+$ and time instant $t^\ast\in\mathbb{R}_+$, inequalities
\begin{equation}\label{eq:goal_functionals}
\|\psi_x(\mathbf{x}(t),t)\|_{\infty,[t^\ast,\infty]}\leq\varepsilon_x, \
\|\psi_y(\mathbf{y}(t),t)\|_{\infty,[t^\ast,\infty]}\leq\varepsilon_y
\end{equation}
specify the desired state of interconnection (\ref{eq:system:s1}),
(\ref{eq:system:s2}). Derive functions $u_x(\mathbf{x},t)$, $u_y(\mathbf{y},t)$
such that for all ${\boldsymbol{\theta}}_x\in\mathbb{R}^{n_{\theta_x}}$,
${\boldsymbol{\theta}}_y\in\mathbb{R}^{n_{\theta_y}}$

1) interconnection (\ref{eq:system:s1}), (\ref{eq:system:s2}) is forward-complete;

2) the trajectories $\mathbf{x}(t)$, $\mathbf{y}(t)$ are bounded;

3) for given values of $\varepsilon_x$, $\varepsilon_y$, some
$t^\ast\in\mathbb{R}_+$ exists such that inequalities
(\ref{eq:goal_functionals}) are satisfied or, possibly, both functions $\psi_x(\mathbf{x}(t),t)$, $\psi_y(\mathbf{y}(t),t)$ converge to zero as $t\rightarrow\infty$.

Function $u_x(\cdot)$ should not depend explicitly on $\mathbf{y}$ and,
symmetrically, function $u_y(\cdot)$ should not depend explicitly on $\mathbf{x}$. The general structure of the desired configuration of the control scheme is provided in Figure 1.
\end{problem}

\begin{figure}
\begin{center}
\includegraphics[width=110pt]{decentralized.eps}
\end{center}
\begin{center}
\caption{General structure of interconnection}\label{fig:decentralized:singularity}
\end{center}
\end{figure}

In the next sections we provide sufficient conditions, ensuring solvability of Problem \ref{problem:decentralized} and we also explicitly derive functions $u_x(\mathbf{x},t)$ and $u_y(\mathbf{y},t)$ which satisfy requirements 1) -- 3) of Problem
\ref{problem:decentralized}. We start with the introduction of a new class of adaptive control schemes and continue by providing the input-output characterizations of the controlled systems.
These results are given in Section \ref{sec:preliminary}. Then,
using these characterizations, in Section \ref{sec:main} we provide the main results of our study.

\section{Assumptions and properties of the decoupled systems}\label{sec:preliminary}

Let the following system be given:
\begin{equation}\label{system1}
\begin{split}
\dot{\mathbf{x}}_1=&\mathbf{f}_1(\mathbf{x})+\mathbf{g}_1(\mathbf{x})u, \\
\dot{\mathbf{x}}_2=&\mathbf{f}_2(\mathbf{x},{\boldsymbol{\theta}})+\mathbf{g}_2(\mathbf{x})u,
\end{split}
\end{equation}
where
\[
\mathbf{x}_1=(x_{11},\dots,x_{1 q})^T\in \mathbb{R}^q; \
\mathbf{x}_2=(x_{21},\dots,x_{2 p})^T\in \mathbb{R}^p;
\]
\[
\mathbf{x}=(x_{11},\dots,x_{1 q},x_{21},\dots,x_{2 p})^T\in \mathbb{R}^{n}
\]
${\boldsymbol{\theta}}\in \Omega_\theta\in \mathbb{R}^d$ is a vector of unknown parameters, and $\Omega_\theta$ is a closed bounded subset of
$\mathbb{R}^d$; $u\in\mathbb{R}$ is the control input, and functions
$\mathbf{f}_1:\mathbb{R}^{n}\rightarrow \mathbb{R}^{q}$,
$\mathbf{f}_2:\mathbb{R}^{n}\times\mathbb{R}^d\rightarrow \mathbb{R}^{p}$,
$\mathbf{g}_1:\mathbb{R}^{n}\rightarrow \mathbb{R}^q$,
$\mathbf{g}_2:\mathbb{R}^{n}\rightarrow\mathbb{R}^{p}$ are continuous and locally bounded. The vector $\mathbf{x}\in\mathbb{R}^n$ is the state vector, and vectors $\mathbf{x}_1$, $\mathbf{x}_2$ are referred to as {\it uncertainty-independent} and {\it uncertainty-dependent} partition of $\mathbf{x}$, respectively. For the sake of compactness we will also use the following description of (\ref{system1}):
\begin{equation}\label{system}
\dot{\mathbf{x}}=\mathbf{f}(\mathbf{x},{\boldsymbol{\theta}})+\mathbf{g}(\mathbf{x})u,
\end{equation}
where
\[
\mathbf{g}(\mathbf{x})=(g_{11}(\mathbf{x}),\dots,g_{1q}(\mathbf{x}),g_{21}(\mathbf{x}),\dots,g_{2 p}(\mathbf{x}))^{T},
\]
\[
\mathbf{f}(\mathbf{x})=(f_{11}(\mathbf{x}),\dots,f_{1q}(\mathbf{x}),f_{21}(\mathbf{x},{\boldsymbol{\theta}}),\dots,f_{2 p}(\mathbf{x},{\boldsymbol{\theta}}))^{T}.
\]

As a measure of closeness of trajectories $\mathbf{x}(t)$ to the desired state we introduce the error or goal function $\psi:\mathbb{R}^n\times
\mathbb{R}_+\rightarrow \mathbb{R}, \ \psi\in \mathcal{C}^1$.
We suppose also that for the chosen function $\psi(\mathbf{x},t)$
satisfies the following:
\begin{assume}[Target operator]\label{assume:psi} For the given function $\psi(\mathbf{x},t)\in \mathcal{C}^1$ the following property holds:
\begin{equation}\label{eq:assume_psi}
\|\mathbf{x}(t)\|_{\infty,[t_0,T]}\leq
\tilde{\gamma}\left(\mathbf{x}_0,{\boldsymbol{\theta}},\|\psi(\mathbf{x}(t),t)\|_{\infty,[t_0,T]}\right)
\end{equation}
where
$\tilde{\gamma}\left(\mathbf{x}_0,{\boldsymbol{\theta}},\|\psi(\mathbf{x}(t),t)\|_{\infty,[t_0,T]}\right)$
is a locally bounded and non-negative function of its arguments.
\end{assume}
Assumption \ref{assume:psi} can be interpreted as a sort of {\it unboundedness observability} property \cite{Jiang_1994} of system
(\ref{system1}) with respect to the ``output" function
$\psi(\mathbf{x},t)$. It can also be viewed as a {\it bounded input -
bounded state} assumption for system (\ref{system1}) along the constraint
$\psi(\mathbf{x}(t,\mathbf{x}_0,t_0,{\boldsymbol{\theta}},u(\mathbf{x}(t),t)),t)=\upsilon(t)$,
where the signal $\upsilon(t)$ serves as a new input. If, however,
boundedness of the state is not explicitly required (i.e. it is guaranteed by additional control or follows from the physical properties of the system itself), Assumption \ref{assume:psi} can be removed from the statements of our results.

Let us specify a class of control inputs $u$ which can ensure boundedness of $\mathbf{x}(t,\mathbf{x}_0,t_0,{\boldsymbol{\theta}},u)$ for every
${\boldsymbol{\theta}}\in \Omega_\theta$ and $\mathbf{x}_0\in\mathbb{R}^n$. According to
(\ref{eq:assume_psi}), boundedness of
$\mathbf{x}(t,\mathbf{x}_0,t_0,{\boldsymbol{\theta}},u)$ is ensured if we find a control input $u$ such that $\psi(\mathbf{x}(t),t)\in L_\infty^1[t_0,\infty]$.
For this objective consider the dynamics of system (\ref{system})
with respect to $\psi(\mathbf{x},t)$:
\begin{equation}\label{dpsi}
\dot{\psi}=L_{\mathbf{f}(\mathbf{x},{\boldsymbol{\theta}})}\psi(\mathbf{x},t)+L_{\mathbf{g}(\mathbf{x})}\psi(\mathbf{x},t)u+\frac{{\partial}
\psi(\mathbf{x},t)}{{\partial} t},
\end{equation}
Assuming that the inverse
$\left(L_{\mathbf{g}(\mathbf{x})}\psi(\mathbf{x},t)\right)^{-1}$ exists everywhere,
we may choose the control input $u$ in the following class of functions:
\begin{equation}\label{control}
\begin{split}
u(\mathbf{x},\hat{\boldsymbol{\theta}},{\boldsymbol{\omega}},t)&=\frac{1}{L_{\mathbf{g}(\mathbf{x})}\psi(\mathbf{x},t)}\left(-L_{\mathbf{f}(\mathbf{x},\hat{{\boldsymbol{\theta}}})}\psi(\mathbf{x},t)-\varphi(\psi,{\boldsymbol{\omega}},t)-\frac{{\partial}\psi(\mathbf{x},t)}{{\partial} t}\right) \\
& \ \varphi: \ \mathbb{R}\times\mathbb{R}^w\times\mathbb{R}_+\rightarrow\mathbb{R}
\end{split}
\end{equation}
where ${\boldsymbol{\omega}}\in\Omega_\omega\subset\mathbb{R}^w$ is a vector of
{\it known} parameters of the function
$\varphi(\psi,{\boldsymbol{\omega}},t)$. Denoting
$L_{\mathbf{f}(\mathbf{x},{\boldsymbol{\theta}})}\psi(\mathbf{x},t)=f(\mathbf{x},{\boldsymbol{\theta}},t)$ and taking into account (\ref{control}) we may rewrite equation
(\ref{dpsi}) in the following manner:
\begin{equation}\label{error_model}
{\dot\psi}=f(\mathbf{x},{\boldsymbol{\theta}},t)-f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)-\varphi(\psi,{\boldsymbol{\omega}},t)
\end{equation}

For the purpose of the present article, instead of
(\ref{error_model}) it is worthwhile to consider the extended equation:
\begin{equation}\label{error_model_d}
{\dot\psi}=f(\mathbf{x},{\boldsymbol{\theta}},t)-f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)-\varphi(\psi,{\boldsymbol{\omega}},t)+\varepsilon(t),
\end{equation}
where, if not stated overwise, the function
$\varepsilon:\mathbb{R}_+\rightarrow\mathbb{R}$, $\varepsilon\in L_{2}^1
[t_0,\infty]\cap C^0$. One of the immediate advantages of equation
(\ref{error_model_d}) in comparison with (\ref{error_model}) is that it allows us to take the presence of coupling between interconnected systems into consideration.

Let us now specify the desired properties of the function
$\varphi(\psi,{\boldsymbol{\omega}},t)$ in (\ref{control}),
(\ref{error_model_d}). The majority of known algorithms for parameter estimation and adaptive control
\cite{Kokotovich95,Fradkov99,Narendra89,Sastry89} assume global
(Lyapunov) stability of system
(\ref{error_model_d}) for ${\boldsymbol{\theta}}\equiv\hat{{\boldsymbol{\theta}}}$. In our study, however, we refrain from this standard, restrictive requirement. Instead we propose that finite energy of the signal
$f(\mathbf{x}(t),{\boldsymbol{\theta}},t)-f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)$, defined for example by its $L_{2}^1[t_0,\infty]$ norm with respect to the variable $t$, results in finite deviation from the target set given by the equality $\psi(\mathbf{x},t)=0$. Formally this requirement is introduced in Assumption \ref{assume:gain}:
\begin{assume}[Target dynamics operator]\label{assume:gain} Consider the following system:
\begin{equation}\label{eq:target_dynamics}
{\dot\psi}=-\varphi(\psi,{\boldsymbol{\omega}},t)+\zeta(t),
\end{equation}
where $\zeta:\mathbb{R}_+\rightarrow\mathbb{R}$ and
$\varphi(\psi,{\boldsymbol{\omega}},t)$ is defined in (\ref{error_model_d}).
Then for every ${\boldsymbol{\omega}}\in\Omega_\omega$ system
(\ref{eq:target_dynamics}) has $L_{2}^1 [t_0,\infty]\mapsto L_\infty^1[t_0,\infty]$ gain with respect to input $\zeta(t)$. In other words, there exists a function $\gamma_{\infty,2}$ such that
\begin{equation}\label{eq:gain_psi_L2}
\|\psi(t)\|_{\infty,[t_0,T]}\leq
\gamma_{\infty,2}(\psi_0,{\boldsymbol{\omega}},\|\zeta(t)\|_{2,[t_0,T]}), \ \
\forall \ \zeta(t)\in L_{2}^1[t_0,T]
\end{equation}
\end{assume}
In contrast to conventional approaches, Assumption
\ref{assume:gain} does not require global {\it asymptotic stability} of the origin of the unperturbed (i.e for $\zeta(t)=0$)
system (\ref{eq:target_dynamics}). When the stability of the target dynamics ${\dot\psi}=-\varphi(\psi,{\boldsymbol{\omega}},t)$ is known a-priori, one of the benefits of Assumption \ref{assume:gain} is that there is no need to know a {\it particular Lyapunov function}
of the unperturbed system.

So far we have introduced basic assumptions on system
(\ref{system1}) and the class of feedback considered in this article. Let us now specify the class of functions
$f(\mathbf{x},{\boldsymbol{\theta}},t)$ in (\ref{error_model_d}). Since general parametrization of function $f(\mathbf{x},{\boldsymbol{\theta}},t)$ is methodologically difficult to deal with, but solutions provided for nonlinearities with convenient linear re-parametrization often yield physically implausible models and large number of unknown parameters, we have opted for a new class of parameterizations.
As a candidate for such a parametrization we suggest nonlinear functions that satisfy the following assumption:
\begin{assume}[Monotonicity and Growth Rate in Parameters]\label{assume:alpha}For the given function
$f(\mathbf{x},{\boldsymbol{\theta}},t)$ in (\ref{error_model_d}) there exists function $\boldsymbol{\alpha}(\mathbf{x},t): \mathbb{R}^{n}\times \mathbb{R}_+\rightarrow
\mathbb{R}^d, \ \boldsymbol{\alpha}(\mathbf{x},t)\in \mathcal{C}^1$ and positive constant $D>0$ such that
\begin{equation}\label{eq:assume_alpha}
(f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)-f(\mathbf{x},{\boldsymbol{\theta}},t))(\boldsymbol{\alpha}(\mathbf{x},t)^{T}(\hat{{\boldsymbol{\theta}}}-{\boldsymbol{\theta}}))\geq0
\end{equation}
\begin{equation}\label{eq:assume_gamma}
|f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)-f(\mathbf{x},{\boldsymbol{\theta}},t)|\leq D
|\boldsymbol{\alpha}(\mathbf{x},t)^{T}(\hat{{\boldsymbol{\theta}}}-{\boldsymbol{\theta}})|
\end{equation}
\end{assume}
This set of conditions naturally extends from systems that are linear in parameters to those with nonlinear parametrization.
Examples and models of physical and artificial systems which satisfy Assumption \ref{assume:alpha} (at least for bounded
${\boldsymbol{\theta}},\hat{{\boldsymbol{\theta}}}\in \Omega_\theta$) can be found in the following references
\cite{Armstrong_1993,Boskovic_1995,Canudas_1999,Abbott_2001,Kitching_2000}.
Assumption \ref{assume:alpha} bounds the growth rate of the difference $|f(\mathbf{x},{\boldsymbol{\theta}},t)-f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)|$ by the functional
$D|\boldsymbol{\alpha}(\mathbf{x},t)^{T}(\hat{{\boldsymbol{\theta}}}-{\boldsymbol{\theta}})|$.
In addition, it might also be useful to have an estimate of
$|f(\mathbf{x},{\boldsymbol{\theta}},t)-f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)|$ from below, as specified in Assumption \ref{assume:alpha_upper}:
\begin{assume}\label{assume:alpha_upper} For the given function
$f(\mathbf{x},{\boldsymbol{\theta}},t)$ in (\ref{error_model_d}) and function
$\boldsymbol{\alpha}(\mathbf{x},t)$, satisfying Assumption \ref{assume:alpha},
there exists a positive constant $D_1>0$ such that
\begin{equation}\label{eq:assume_alpha_upper}
|f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)-f(\mathbf{x},{\boldsymbol{\theta}},t)|\geq D_1
|\boldsymbol{\alpha}(\mathbf{x},t)^{T}(\hat{{\boldsymbol{\theta}}}-{\boldsymbol{\theta}})|
\end{equation}
\end{assume}

\noindent In problems of adaptation, parameter and optimization estimation, effectiveness of the algorithms often depends on how
"good" the nonlinearity $f(\mathbf{x},{\boldsymbol{\theta}},t)$ is, and how predictable is the system's behavior. As a measure of goodness and predictability usually the substitutes as smoothness and boundedness are considered. In our study, we distinguish several of such specific properties of the functions $f(\mathbf{x},{\boldsymbol{\theta}},t)$
and $\varphi(\psi,{\boldsymbol{\omega}},t)$. These properties are provided below.

\begin{hyp}\label{hyp:locally_bound_uniform_f} The function $f(\mathbf{x},{\boldsymbol{\theta}},t)$ is locally bounded with respect to $\mathbf{x}$, ${{\boldsymbol{\theta}}}$ uniformly in $t$.
\end{hyp}

\begin{hyp}\label{hyp:locally_bound_uniform_df} The function $f(\mathbf{x},{\boldsymbol{\theta}},t)\in \mathcal{C}^1$, and $ {\partial}
{f(\mathbf{x},{\boldsymbol{\theta}},t)}/{{\partial} t}$ is locally bounded with respect to
$\mathbf{x}$, ${{\boldsymbol{\theta}}}$ uniformly in $t$.
\end{hyp}

\begin{hyp}\label{hyp:locally_bound_uniform_phi} The function $\varphi(\psi,{\boldsymbol{\omega}},t)$ is locally bounded in $\psi$,
${\boldsymbol{\omega}}$ uniformly in $t$.
\end{hyp}

Let us show that under an additional structural requirement, which relates properties of the function $\boldsymbol{\alpha}(\mathbf{x},t)$ and vector-field
$\mathbf{f}(\mathbf{x},{\boldsymbol{\theta}})=\mathbf{f}_1(\mathbf{x},{\boldsymbol{\theta}})\oplus\mathbf{f}_2(\mathbf{x},{\boldsymbol{\theta}})$
in (\ref{system1}), (\ref{system}), there exist adaptive algorithms ensuring that the following desired property holds:
\begin{equation}\label{eq:desired_prop}
\mathbf{x}(t)\in L_\infty^n[t_0,\infty]; \
f(\mathbf{x}(t),{\boldsymbol{\theta}},t)-f(\mathbf{x},\hat{{\boldsymbol{\theta}}}(t),t)\in L_{2}^1[t_0,\infty]
\end{equation}

Consider the following adaptation algorithms:
\begin{equation}\label{fin_forms_ours_tr1}
\begin{split}
\hat{{\boldsymbol{\theta}}}(\mathbf{x},t)&=\Gamma(\hat{{\boldsymbol{\theta}}}_P(\mathbf{x},t)+\hat{{\boldsymbol{\theta}}}_I(t));
\ \Gamma\in\mathbb{R}^{d\times d}, \ \Gamma>0
\\ \hat{{\boldsymbol{\theta}}}_P(\mathbf{x},t)&=
\psi(\mathbf{x},t)\boldsymbol{\alpha}(\mathbf{x},t)-\Psi(\mathbf{x},t) \\
\dot{\hat{{\boldsymbol{\theta}}}}_I&=\varphi(\psi(\mathbf{x},t),{\boldsymbol{\omega}},t)\boldsymbol{\alpha}(\mathbf{x},t)+\mathcal{R}(\mathbf{x},\hat{{\boldsymbol{\theta}}},u(\mathbf{x},\hat{{\boldsymbol{\theta}}},t),t),
\end{split}
\end{equation}
where the function
$\mathcal{R}(\mathbf{x},\hat{{\boldsymbol{\theta}}},u(\mathbf{x},\hat{{\boldsymbol{\theta}}},t),t):\mathbb{R}^n\times\mathbb{R}^d\times\mathbb{R}\times\mathbb{R}_+\rightarrow\mathbb{R}^d$
in (\ref{fin_forms_ours_tr1}) is given as follows:
\begin{equation}\label{fin_forms_ours_tr11}
\begin{split}
&\mathcal{R}(\mathbf{x},u(\mathbf{x},\hat{{\boldsymbol{\theta}}},t),t)={{\partial}
\Psi(\mathbf{x},t)}/{{\partial} t}-\psi(\mathbf{x},t)({{\partial}
\boldsymbol{\alpha}(\mathbf{x},t)}/{{\partial} t}+L_{\mathbf{f}_1}\boldsymbol{\alpha}(\mathbf{x},t))\\
& + L_{\mathbf{f}_1}
\Psi(\mathbf{x},t)-(\psi(\mathbf{x},t)L_{\mathbf{g}_1}\boldsymbol{\alpha}(\mathbf{x},t)-L_{\mathbf{g}_1}
\Psi(\mathbf{x},t))u(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)
\end{split}
\end{equation}
and function
$\Psi(\mathbf{x},t):\mathbb{R}^{n}\times\mathbb{R}_+\rightarrow\mathbb{R}_d$,
$\Psi(\mathbf{x},t)\in \mathcal{C}^1$ satisfies Assumption
\ref{assume:explicit_realizability}.
\begin{assume}\label{assume:explicit_realizability} There exists a function $\Psi(\mathbf{x},t)$ such that
\begin{equation}\label{eq:assume_explicit}
\frac{{\partial} \Psi(\mathbf{x},t)}{{\partial} \mathbf{x}_2}-\psi(\mathbf{x},t)\frac{{\partial}
\boldsymbol{\alpha}(\mathbf{x},t)}{{\partial} \mathbf{x}_2}=0
\end{equation}
\end{assume}
Additional restrictions imposed by this assumption will be discussed in some details after we summarize the properties of system (\ref{system1}), (\ref{control}),
(\ref{fin_forms_ours_tr1}), (\ref{fin_forms_ours_tr11}) in the following theorem.

\begin{theorem}[Properties of the decoupled systems]\label{stability_theorem}
Let system (\ref{system1}), (\ref{error_model_d}),
(\ref{fin_forms_ours_tr1}), (\ref{fin_forms_ours_tr11}) be given and Assumptions \ref{assume:alpha}, \ref{assume:alpha_upper},
\ref{assume:explicit_realizability} be satisfied. Then the following properties hold

P1) Let for the given initial conditions $\mathbf{x}(t_0)$,
$\hat{{\boldsymbol{\theta}}}_I(t_0)$ and parameters vector ${\boldsymbol{\theta}}$,
interval $[t_0,T^\ast]$ be the (maximal) time-interval of existence of solutions of the closed loop system (\ref{system1}),
(\ref{error_model_d}), (\ref{fin_forms_ours_tr1}),
(\ref{fin_forms_ours_tr11}). Then
\begin{equation}\label{eq:f_diff_L2}
\|f(\mathbf{x}(t),{\boldsymbol{\theta}},t)-f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t))\|_{2,[t_0,T^\ast]}\leq D_f({\boldsymbol{\theta}},t_0,\Gamma,\|\varepsilon(t)\|_{2,[t_0,T^\ast]});
\end{equation}
\[
D_f({\boldsymbol{\theta}},t_0,\Gamma,\|\varepsilon(t)\|_{2,[t_0,T^\ast]})=\left(\frac{D}{2}\|{\boldsymbol{\theta}}-\hat{{\boldsymbol{\theta}}}(t_0)\|^{2}_{\Gamma^{-1}}\right)^{0.5}
+ \frac{D}{D_1}\|\varepsilon(t)\|_{2,[t_0,T^\ast]}
\]
\[
\|{\boldsymbol{\theta}}-\hat{\boldsymbol{\theta}}(t)\|^{2}_{\Gamma^{-1}}\leq
\|\hat{{\boldsymbol{\theta}}}(t_0)-{\boldsymbol{\theta}}\|^{2}_{\Gamma^{-1}}+\frac{D}{2 D_1^2}\|\varepsilon(t)\|^{2}_{2,[t_0,T^\ast]}
\]

\noindent In addition, if Assumptions \ref{assume:psi} and
\ref{assume:gain} are satisfied then

P2) $\psi(\mathbf{x}(t),t)\in L_\infty^1[t_0,\infty]$, $\mathbf{x}(t)\in L_{\infty}^n[t_0,\infty]$ and
\begin{equation}\label{eq:psi_gain}
\|\psi(\mathbf{x}(t),t)\|_{\infty,[t_0,\infty]}\leq
\gamma_{\infty,2}\left(\psi(\mathbf{x}_0,t_0),{\boldsymbol{\omega}},\mathcal{D}\right)
\end{equation}
\[
\mathcal{D}=D_f({\boldsymbol{\theta}},t_0,\Gamma,\|\varepsilon(t)\|_{2,[t_0,\infty]})+\|\varepsilon(t)\|_{2,[t_0,\infty]}
\]

P3) if properties H\ref{hyp:locally_bound_uniform_f},
H\ref{hyp:locally_bound_uniform_phi} hold, and system
(\ref{eq:target_dynamics}) has $L_{2}^1 [t_0,\infty]\mapsto L_{p}^1 [t_0,\infty]$, $p>1$ gain with respect to input $\zeta(t)$
and output $\psi$ then
\begin{equation}\label{eq:convergence_psi_theorem}
\varepsilon(t)\in L_{2}^1 [t_0,\infty]\cap L_{\infty}^1[t_0,\infty]\Rightarrow
\lim_{t\rightarrow\infty}\psi(\mathbf{x}(t),t)=0
\end{equation}

If, in addition, property H\ref{hyp:locally_bound_uniform_df}
holds, and the functions $\boldsymbol{\alpha}(\mathbf{x},t)$, ${\partial}
\psi(\mathbf{x},t)/{\partial} t$ are locally bounded with respect to $\mathbf{x}$
uniformly in $t$, then

P4) the following holds
\begin{equation}\label{eq:convergence_f_theorem}
\lim_{t\rightarrow\infty}f(\mathbf{x}(t),{\boldsymbol{\theta}},t)-f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)=0
\end{equation}

\end{theorem}
The proof of Theorem \ref{stability_theorem} and subsequent results are given in Section 6.

Let us briefly comment on Assumption
\ref{assume:explicit_realizability}.
Let $\boldsymbol{\alpha}(\mathbf{x},t)\in
\mathcal{C}^2$,
$\boldsymbol{\alpha}(\mathbf{x},t)=\mathrm{col}(\alpha_1(\mathbf{x},t),\dots,\alpha_d(\mathbf{x},t))$,
then necessary and sufficient conditions for existence of the function $\Psi(\mathbf{x},t)$ follow from the Poincar$\acute{\mathrm{e}}$ lemma:
\begin{equation}\label{eq:poincare}
\frac{{\partial}}{{\partial} \mathbf{x}_2}\left(\psi(\mathbf{x},t)\frac{{\partial}
\alpha_i(\mathbf{x},t)}{{\partial}
\mathbf{x}_2}
\right)=\left(\frac{{\partial}}{{\partial}
\mathbf{x}_2}\left(\psi(\mathbf{x},t)\frac{{\partial} \alpha_i(\mathbf{x},t)}{{\partial}
\mathbf{x}_2}
\right)
\right)^T
\end{equation}
This relation, in the form of conditions of existence of the solutions for function $\Psi(\mathbf{x},t)$ in
(\ref{eq:assume_explicit}), takes into account structural properties of system (\ref{system1}), (\ref{error_model_d}).
Indeed,
consider partial derivatives ${\partial} \alpha_i(\mathbf{x},t)/{\partial} \mathbf{x}_2$,
${\partial} \psi(\mathbf{x},t)/{\partial} \mathbf{x}_2$ with respect to the vector
$\mathbf{x}_2=(x_{21},\dots,x_{2p})^T$. Let
\begin{equation}\label{eq:single_dim}
\begin{split}
\frac{{\partial} \psi(\mathbf{x},t)}{{\partial} \mathbf{x}_2}=\left(\begin{array}{cccccccc}
0& 0
& \cdots & 0& \ast & 0&\cdots&0
\end{array}\right), \
\frac{{\partial}
\alpha_i(\mathbf{x},t)}{{\partial}\mathbf{x}_2}=\left(\begin{array}{cccccccc}
0 & 0
& \cdots & 0&
\ast &
0&\cdots&0
\end{array}\right)
\end{split}
\end{equation}
where the symbol $\ast$ denotes a function of $\mathbf{x}$ and $t$. Then condition (\ref{eq:single_dim}) guarantees that equality
(\ref{eq:poincare}) (and, subsequently, Assumption
\ref{assume:explicit_realizability}) holds. In case ${\partial}
\alpha(\mathbf{x}_1\oplus \mathbf{x}_2,t)/{\partial} \mathbf{x}_2=0$, Assumption
\ref{assume:explicit_realizability} holds for arbitrary
$\psi(\mathbf{x},t)\in \mathcal{C}^1$. If $\psi(\mathbf{x},t)$,
$\boldsymbol{\alpha}(\mathbf{x},t)$ depend on a single component of $\mathbf{x}_2$, for instance $x_{2k}, \ k\in\{0,\dots,p\}$, then conditions
(\ref{eq:single_dim}) hold and the function $\Psi(\mathbf{x},t)$ can be derived explicitly by integration
\begin{equation}\label{eq:single_dim_int}
\Psi(\mathbf{x},t)=\int\psi(\mathbf{x},t)\frac{\boldsymbol{\alpha}(\mathbf{x},t)}{{\partial} x_{2k}}d x_{2k}
\end{equation}
In all other cases, existence of the required function
$\Psi(\mathbf{x},t)$ follows from (\ref{eq:poincare}).

In the general case, when $\dim\{\mathbf{x}_2\}>1$, the problems of finding a function $\Psi(\mathbf{x},t)$ satisfying condition
(\ref{eq:assume_explicit}) can be avoided (or converted into one with an already known solutions such as (\ref{eq:poincare}),
(\ref{eq:single_dim_int})) by the {\it embedding} technique proposed in \cite{ECC_2003}. The main idea of the method is to introduce an auxiliary system that is forward-complete with respect to input $\mathbf{x}(t)$
\begin{equation}\label{eq:embed}
\begin{split}
\dot{{\boldsymbol{\xi}}}&=\mathbf{f}_{\boldsymbol{\xi}}(\mathbf{x},{\boldsymbol{\xi}},t), \ {\boldsymbol{\xi}}\in\mathbb{R}^z \\
\mathbf{h}_\xi&=\mathbf{h}_\xi({\boldsymbol{\xi}},t), \
\mathbb{R}^z\times\mathbb{R}_+\rightarrow\mathbb{R}^h
\end{split}
\end{equation}
such that
\begin{equation}\label{eq:embed_L2}
\|f(\mathbf{x}(t),{\boldsymbol{\theta}},t)-f(\mathbf{x}_1(t)\oplus\mathbf{h}_\xi(t)\oplus\mathbf{x}_2'(t),{\boldsymbol{\theta}},t)\|_{2,[t_0,T]}
\leq C_\xi\in\mathbb{R}_+
\end{equation}
for all $T\geq t_0$, and $\dim\{{\mathbf{h}_\xi}\}+\dim{\{\mathbf{x}_2'\}}=p$.
Then (\ref{error_model_d}) can be rewritten as follows:
\begin{equation}\label{error_model_d1}
{\dot\psi}=f(\mathbf{x}_1\oplus\mathbf{h}_\xi\oplus\mathbf{x}_2',{\boldsymbol{\theta}},t)-f(\mathbf{x}_1\oplus\mathbf{h}_\xi\oplus\mathbf{x}_2',\hat{\boldsymbol{\theta}},t)-\varphi(\psi,{\boldsymbol{\omega}},t)+\varepsilon_\xi(t),
\end{equation}
where $\varepsilon_\xi(t)\in L_{2}^1 [t_0,\infty]$, and
$\dim\{\mathbf{x}_2'\}=p-h<p$. In principle, the dimension of $\mathbf{x}_2'$
could be reduced to $1$ or $0$. As soon as this is ensured,
Assumption \ref{assume:explicit_realizability} will be satisfied and the results of Theorem \ref{stability_theorem} follow.
Sufficient conditions ensuring the existence of such an embedding in the general case are provided in \cite{ECC_2003}. For systems in which the parametric uncertainty can be reduced to vector fields with low-triangular structure the embedding is given in
\cite{ALCOSP_2004}.

\section{Main Results}\label{sec:main}

Without loss of generality let us rewrite interconnection
(\ref{eq:system:s1}), (\ref{eq:system:s2}) as follows
:
\begin{equation}\label{eq:system:s11}
\begin{split}
\dot{\mathbf{x}}_1&=\mathbf{f}_1(\mathbf{x})+\mathbf{g}_1(\mathbf{x})u_x\\
\dot{\mathbf{x}}_2 &=\mathbf{f}_2(\mathbf{x},{\boldsymbol{\theta}}_x)+\gamma_y(\mathbf{y},t)+
\mathbf{g}_2(\mathbf{x})u_x
\end{split}
\end{equation}

\begin{equation}\label{eq:system:s21}
\begin{split}
\dot{\mathbf{y}}_1&=\mathbf{q}_1(\mathbf{y})+\mathbf{z}_1(\mathbf{y})u_y\\
\dot{\mathbf{y}}_2&=\mathbf{q}_2(\mathbf{y},{\boldsymbol{\theta}}_y)+\gamma_x(\mathbf{x},t)+\mathbf{z}_2(\mathbf{y})u_y
\end{split}
\end{equation}

Let us now consider the following control functions
\begin{equation}\label{control_s1}
\begin{split}
u_x(\mathbf{x},\hat{{\boldsymbol{\theta}}}_x,{\boldsymbol{\omega}}_x,t)&=(L_{\mathbf{g}(\mathbf{x})}\psi_x(\mathbf{x},t))^{-1}\left(-L_{\mathbf{f}(\mathbf{x},\hat{{\boldsymbol{\theta}}}_x)}\psi_x(\mathbf{x},t)-\varphi_x(\psi_x,{\boldsymbol{\omega}}_x,t)\right.\\
& \left.-\frac{{\partial}\psi_x(\mathbf{x},t)}{{\partial} t}\right), \ \ \varphi_x: \
\mathbb{R}\times\mathbb{R}^w\times\mathbb{R}_+\rightarrow\mathbb{R}
\end{split}
\end{equation}
\begin{equation}\label{control_s2}
\begin{split}
u_y(\mathbf{y},\hat{{\boldsymbol{\theta}}}_y,{\boldsymbol{\omega}}_y,t)&=(L_{\mathbf{z}(\mathbf{y})}\psi_y(\mathbf{y},t))^{-1}\left(-L_{\mathbf{q}(\mathbf{y},\hat{{\boldsymbol{\theta}}}_y)}\psi_y(\mathbf{y},t)-\varphi_y(\psi_y,{\boldsymbol{\omega}}_y,t)\right.\\
&\left.-\frac{{\partial}\psi_y(\mathbf{y},t)}{{\partial} t}\right), \ \ \varphi_y: \
\mathbb{R}\times\mathbb{R}^w\times\mathbb{R}_+\rightarrow\mathbb{R}
\end{split}
\end{equation}
These functions transform the original equations
(\ref{eq:system:s11}), (\ref{eq:system:s21}) into the following form
\begin{equation}\label{eq:error_coupled}
\begin{split}
{\dot\psi}_x&=-\varphi_x(\psi_x,{\boldsymbol{\omega}}_x,t)+f_x(\mathbf{x},{\boldsymbol{\theta}}_x,t)-f_x(\mathbf{x},\hat{{\boldsymbol{\theta}}}_x,t)+h_y(\mathbf{x},\mathbf{y},t)\\
{\dot\psi}_y&=-\varphi_y(\psi_x,{\boldsymbol{\omega}}_y,t)+f_y(\mathbf{y},{\boldsymbol{\theta}}_y,t)-f_y(\mathbf{y},\hat{{\boldsymbol{\theta}}}_y,t)+h_x(\mathbf{x},\mathbf{y},t),
\end{split}
\end{equation}
where
\[
h_x(\mathbf{x},\mathbf{y},t)=L_{\gamma_y(\mathbf{y},t)}\psi_x(\mathbf{x},t), \
h_y(\mathbf{x},\mathbf{y},t)=L_{\gamma_x(\mathbf{x},t)}\psi_y(\mathbf{y},t)
\]
\[
f_x(\mathbf{x},{\boldsymbol{\theta}}_x,t)=L_{\mathbf{f}(\mathbf{x},{\boldsymbol{\theta}}_x)}\psi_x(\mathbf{x},t), \
f_y(\mathbf{x},{\boldsymbol{\theta}}_y,t)=L_{\mathbf{q}(\mathbf{y},{\boldsymbol{\theta}}_y)}\psi_y(\mathbf{y},t)
\]

Consider the following adaptation algorithms
\begin{equation}\label{fin_forms_ours_tr1x}
\begin{split}
\hat{{\boldsymbol{\theta}}}_x(\mathbf{x},t)&=\Gamma_x(\hat{{\boldsymbol{\theta}}}_{P,x}(\mathbf{x},t)+\hat{{\boldsymbol{\theta}}}_{I,x}(t));
\ \Gamma_x\in\mathbb{R}^{d\times d}, \ \Gamma_x>0
\\ \hat{{\boldsymbol{\theta}}}_{P,x}(\mathbf{x},t)&=
\psi_x(\mathbf{x},t)\boldsymbol{\alpha}_x(\mathbf{x},t)-\Psi_x(\mathbf{x},t) \\
\dot{\hat{{\boldsymbol{\theta}}}}_{I,x}&=\varphi_x(\psi_x(\mathbf{x},t),{\boldsymbol{\omega}}_x,t)\boldsymbol{\alpha}_x(\mathbf{x},t)+\mathcal{R}_x(\mathbf{x},\hat{{\boldsymbol{\theta}}}_x,u_x(\mathbf{x},\hat{{\boldsymbol{\theta}}}_x,t),t),
\end{split}
\end{equation}

\begin{equation}\label{fin_forms_ours_tr1y}
\begin{split}
\hat{{\boldsymbol{\theta}}}_y(\mathbf{x},t)&=\Gamma_y(\hat{{\boldsymbol{\theta}}}_{P,y}(\mathbf{y},t)+\hat{{\boldsymbol{\theta}}}_{I,y}(t));
\ \Gamma_y\in\mathbb{R}^{d\times d}, \ \Gamma_y>0
\\ \hat{{\boldsymbol{\theta}}}_{P,y}(\mathbf{y},t)&=
\psi_y(\mathbf{y},t)\boldsymbol{\alpha}_y(\mathbf{y},t)-\Psi_y(\mathbf{y},t) \\
\dot{\hat{{\boldsymbol{\theta}}}}_{I,y}&=\varphi_y(\psi_y(\mathbf{y},t),{\boldsymbol{\omega}}_y,t)\boldsymbol{\alpha}_y(\mathbf{y},t)+\mathcal{R}_y(\mathbf{x},\hat{{\boldsymbol{\theta}}}_y,u_y(\mathbf{y},\hat{{\boldsymbol{\theta}}}_y,t),t),
\end{split}
\end{equation}
where $\mathcal{R}_x(\cdot)$, $\mathcal{R}_y(\cdot)$ are defined as in (\ref{fin_forms_ours_tr11}), and the functions
$\Psi_x(\cdot)$, $\Psi_y(\cdot)$ will be specified later. Now we are ready to formulate the following result

\begin{theorem}[Properties of the interconnected systems]\label{theorem:interconnection} Let systems (\ref{eq:system:s11}), (\ref{eq:system:s21}) be given. Furthermore, suppose that the following conditions hold:

1) The functions $\psi_x(\mathbf{x},t)$, $\psi_y(\mathbf{y},t)$ satisfy Assumption \ref{assume:psi} for systems (\ref{eq:system:s11}),
(\ref{eq:system:s21}) respectively;

2) The systems
\begin{equation}\label{eq:target_dynamics_connected}
\dot{\psi}_x=-\varphi_x(\psi_x,{\boldsymbol{\omega}}_x,t)+\zeta_x(t), \ \
\dot{\psi}_y=-\varphi_y(\psi_y,{\boldsymbol{\omega}}_y,t)+\zeta_y(t)
\end{equation}
satisfy Assumption \ref{assume:gain} with corresponding mappings
\[
\gamma_{x_{\infty,2}}(\psi_{x_0},{\boldsymbol{\omega}}_x,\|\zeta_x(t)\|_{2,[t_0,T]}),
\ \
\gamma_{y_{\infty,2}}(\psi_{y_0},{\boldsymbol{\omega}}_y,\|\zeta_y(t)\|_{2,[t_0,T]}),
\]

3) The systems (\ref{eq:target_dynamics_connected}) have
$L_2^1[t_0,\infty]\mapsto L_2^1[t_0,\infty]$ gains, that is
\begin{equation}\label{eq:L_2_2_gains}
\begin{split}
\|\psi_x(\mathbf{x}(t),t)\|_{2,[t_0,T]}&\leq C_{\gamma_x}+\gamma_{x_{2,2}}(\|\zeta_x(t)\|_{2,[t_0,T]}),\\
\|\psi_y(\mathbf{y}(t),t)\|_{2,[t_0,T]}&\leq C_{\gamma_y}+\gamma_{y_{2,2}}(\|\zeta_y(t)\|_{2,[t_0,T]}),\\
C_{\gamma_x}, \ C_{\gamma_y}\in\mathbb{R}_+& \gamma_{x_{2,2}}, \
\gamma_{y_{2,2}}\in\mathcal{K}_\infty
\end{split}
\end{equation}

4) The functions $f_x(\mathbf{x},{\boldsymbol{\theta}}_x,t)$,
$f_y(\mathbf{y},{\boldsymbol{\theta}}_y,t)$ satisfy Assumptions \ref{assume:alpha},
\ref{assume:alpha_upper} with corresponding constants $D_x$,
$D_{x_1}$, $D_y$, $D_{y_1}$ and functions $\boldsymbol{\alpha}_x(\mathbf{x},t)$,
$\boldsymbol{\alpha}_y(\mathbf{y},t)$;

5) The functions $h_x(\mathbf{x},\mathbf{y},t)$, $h_y(\mathbf{x},\mathbf{y},t)$ satisfy the following inequalities:
\begin{equation}\label{eq:disturbance_gain}
\|h_x(\mathbf{x},\mathbf{y},t)\|\leq \beta_x \|\psi_x(\mathbf{x},t)\|, \
\|h_y(\mathbf{x},\mathbf{y},t)\|\leq \beta_y \|\psi_y(\mathbf{y},t)\|, \ \beta_x,
\beta_y\in \mathbb{R}_+
\end{equation}

Finally, let the functions $\Psi_x(\mathbf{x},t)$, $\Psi_y(\mathbf{y},t)$ in
(\ref{fin_forms_ours_tr1x}), (\ref{fin_forms_ours_tr1y}) satisfy Assumption \ref{assume:explicit_realizability}
for systems (\ref{eq:system:s11}), (\ref{eq:system:s21})
respectively, and there exist functions $\rho_1(\cdot), \
\rho_2(\cdot), \ \rho_3(\cdot)>Id(\cdot)\in\mathcal{K}_\infty$ and constant $\bar{\Delta}\in\mathbb{R}_+$ such the following inequality holds:
\begin{equation}\label{eq:small_gain_adapt}
\beta_y\circ\gamma_{y_{2,2}}\circ\rho_1\circ\left(\frac{D_y}{D_{y,1}}+1\right)\circ\rho_3\circ
\beta_x\circ
\gamma_{x_{2,2}}\circ\rho_2\circ\left(\frac{D_x}{D_{x,1}}+1\right)(\Delta)<
\Delta
\end{equation}
for all $\Delta\geq \bar{\Delta}$. Then

C1) The interconnection (\ref{eq:system:s11}),
(\ref{eq:system:s21}) with controls (\ref{control_s1}),
(\ref{control_s2}) is forward-complete and trajectories $\mathbf{x}(t)$,
$\mathbf{y}(t)$ are bounded

Furthermore,

C2) if properties H\ref{hyp:locally_bound_uniform_f},
H\ref{hyp:locally_bound_uniform_phi} hold for
$f_x(\mathbf{x},{\boldsymbol{\theta}}_x,t)$, $f_y(\mathbf{y},{\boldsymbol{\theta}}_y,t)$,
$h_x(\mathbf{x},\mathbf{y},t)$, $h_y(\mathbf{x},\mathbf{y},t)$, and also functions
$\varphi_x(\psi_x,{\boldsymbol{\omega}}_x,t)$,
$\varphi_y(\psi_y,{\boldsymbol{\omega}}_y,t)$, then
\begin{equation}\label{eq:convergence_psi_xy}
\lim_{t\rightarrow\infty}\psi_x(\mathbf{x}(t),t)=0, \
\lim_{t\rightarrow\infty}\psi_y(\mathbf{y}(t),t)=0
\end{equation}

Moreover,

C3) if property H\ref{hyp:locally_bound_uniform_df} holds for
$f_x(\mathbf{x},{\boldsymbol{\theta}}_x,t)$, $f_y(\mathbf{y},{\boldsymbol{\theta}}_y,t)$, and the functions
\[
\boldsymbol{\alpha}_x(\mathbf{x},t), \ {\partial} \psi_x(\mathbf{x},t)/{\partial} t, \
\boldsymbol{\alpha}_y(\mathbf{y},t), \ {\partial} \psi_y(\mathbf{y},t)/{\partial} t
\]
are locally bounded with respect to $\mathbf{x}$, $\mathbf{y}$ uniformly in $t$,
then
\begin{equation}\label{eq:convergence_f_xy}
\begin{split}
\lim_{t\rightarrow\infty}f_x(\mathbf{x}(t),{\boldsymbol{\theta}}_x,t)-f_x(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}_x(t),t)&=0,
\\
\lim_{t\rightarrow\infty}f_y(\mathbf{y}(t),{\boldsymbol{\theta}}_y,t)-f_y(\mathbf{y}(t),\hat{{\boldsymbol{\theta}}}_y(t),t)&=0
\end{split}
\end{equation}
\end{theorem}

Let us briefly comment on the conditions and assumptions of Theorem \ref{theorem:interconnection}. Conditions 1), 2) specify restrictions on the goal functionals, similar to those of Theorem
\ref{stability_theorem}. Condition 3) is analogous to requirement to P3) in Theorem \ref{stability_theorem}, condition 5) specifies uncertainties in the coupling functions $h_x(\cdot)$, $h_y(\cdot)$
in terms of their growth rates w.r.t. $\psi_x(\cdot)$,
$\psi_y(\cdot)$. We observe here that this property is needed in order to characterize the $L_2$ norms of functions
$h_x(\mathbf{x}(t),\mathbf{y}(t),t)$, $h_y(\mathbf{x}(t),\mathbf{y}(t),t)$ in terms of the
$L_2$ norms of functions $\psi_x(\mathbf{x}(t),t)$, $\psi_y(\mathbf{y}(t),t)$.
Therefore, it is possible to replace requirement
(\ref{eq:disturbance_gain}) with the following set of conditions:
\begin{equation}\label{eq:disturbance_gain_1}
\begin{split}
\|h_x(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]}&\leq \beta_x
\|\psi_x(\mathbf{x}(t),t)\|_{2,[t_0,T]}+C_x, \\
\|h_y(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]}&\leq \beta_y
\|\psi_y(\mathbf{y}(t),t)\|_{2,[t_0,T]}+C_y
\end{split}
\end{equation}
The replacement will allow us to extend results of Theorem
\ref{theorem:interconnection} to interconnections of systems where the coupling functions do not depend explicitly on
$\psi_x(\mathbf{x}(t),t)$, $\psi_y(\mathbf{y}(t),t)$. We illustrate this possibility later with an example.

Condition (\ref{eq:small_gain_adapt}) is the small-gain condition with respect to the $L_2^1[t_0,T]$ norms for interconnection
(\ref{eq:system:s11}), (\ref{eq:system:s21}) with control
(\ref{control_s1}), (\ref{control_s2}). In the case that mappings
$\gamma_{x_{2,2}}(\cdot)$, $\gamma_{y_{2,2}}(\cdot)$ in
(\ref{eq:target_dynamics_connected}) are majorated by linear functions
\[
\gamma_{x_{2,2}}(\Delta)\leq g_{x_{2,2}} \Delta, \
\gamma_{y_{2,2}}(\Delta)\leq g_{y_{2,2}} \Delta, \ \Delta\geq 0,
\]
condition (\ref{eq:small_gain_adapt}) reduces to the much simpler
\[
\beta_y \beta_x g_{x_{2,2}} g_{y_{2,2}}
\left(\frac{D_y}{D_{y,1}}+1\right)\left(\frac{D_x}{D_{x,1}}+1\right)<
1
\]
Notice also that the mappings $\gamma_{x_{2,2}}(\cdot)$,
$\gamma_{y_{2,2}}(\cdot)$ are defined by properties of the target dynamics (\ref{eq:target_dynamics_connected}), and, in principle,
these can be made arbitrarily small. This eventually leads to the following conclusion: the smaller the $L_2$-gains of the target dynamics of systems $\mathcal{S}_1$, $\mathcal{S}_2$, the wider the class of nonlinearities (bounds for $\beta_x$, $\beta_y$, domains of $D_x$, $D_{1,x}$, $D_y$, $D_{1,y}$) which admit a solution to Problem \ref{problem:decentralized}.

\paragraph{Example}

Let us illustrate application of Theorem
\ref{theorem:interconnection} to the problem of decentralized control of two coupled oscillators with nonlinear damping.
Consider the following interconnected systems:
\begin{equation}\label{eq:example_dec_model}
\left\{\begin{array}{ll}
\dot{x}_{1}&=x_{2}\\
\dot{x}_{2}&=f_x(x_{1},\theta_x)+k_1 y_{1} + u_x,
\end{array} \right. \ \
\left\{
\begin{array}{ll}
\dot{y}_{1}&=y_{2}\\
\dot{y}_{22}&=f_y(y_{1},\theta_y)+k_2 x_{1}+ u_y,
\end{array}\right.
\end{equation}
where $k_1$, $k_2\in\mathbb{R}$ are uncertain parameters of coupling,
functions $f(x_{1},\theta_x)$, $f(y_{1},\theta_y)$
stand for the nonlinear damping terms, and
$\theta_{x}$, $\theta_y$ are unknown parameters. For illustrative purpose we assume the following mathematical model for functions
$f_x(\cdot)$, $f_y(\cdot)$ in (\ref{eq:example_dec_model}):
\begin{equation}\label{eq:example_dec_uncertainty}
\begin{split}
f_x(x_{1},\theta_x)&= \theta_x (x_{1}-x_0)+0.5\sin
(\theta_x(x_{1}-x_0)),\\
\ f_y(y_{1},\theta_y)&= \theta_y (y_{1}-y_0)+0.6\sin
(\theta_y(y_{1}-y_0))
\end{split}
\end{equation}
where $x_0$, $y_0$ are known. Let the control goal be to steer states $\mathbf{x}$ and $\mathbf{y}$ to the origin. Consider the following goal functions
\begin{equation}\label{eq:example_psi}
\psi_x(\mathbf{x},t)=x_1+x_2, \ \psi_y(\mathbf{y},t)= y_1+y_2
\end{equation}
Taking into account equations (\ref{eq:example_dec_model}) and
(\ref{eq:example_psi}) we can derive that
\begin{equation}\label{eq:example_relative_dynamics}
\dot{x}_1=-x_1+\psi_x(\mathbf{x}(t),t), \ \dot{y}_1=-y_1+\psi_y(\mathbf{y},t)
\end{equation}
This automatically implies that
\[
\begin{split}
\|x_1(t)\|_{\infty,[t_0,T]}&\leq
\|x_1(t_0)\|+\|\psi_x(\mathbf{x}(t),t)\|_{\infty,[t_0,T]}\\
\|y_1(t)\|_{\infty,[t_0,T]}&\leq
\|y_1(t_0)\|+\|\psi_y(\mathbf{y}(t),t)\|_{\infty,[t_0,T]}
\end{split}
\]
Hence, Assumption \ref{assume:psi} is satisfied for chosen goal functions $\psi_x(\cdot)$ and $\psi_y(\cdot)$. Notice also that equalities (\ref{eq:example_relative_dynamics}) imply that
\begin{equation}\label{eq:example_L2_gains}
\begin{split}
\|x_1(t)\|_{2,[t_0,T]}&\leq 2^{-1/2}\|x_1(t_0)\|+
\|\psi_x(\mathbf{x},t)\|_{2,[t_0,T]}\\
\|y_1(t)\|_{2,[t_0,T]}&\leq 2^{-1/2}\|y_1(t_0)\|+
\|\psi_y(\mathbf{y},t)\|_{2,[t_0,T]}
\end{split}
\end{equation}
Moreover, according to (\ref{eq:example_relative_dynamics})
limiting relations
\begin{equation}\label{eq:example_control_goal_limit}
\begin{split}
&
\lim_{t\rightarrow\infty}\psi_x(\mathbf{x}(t),t)=\lim_{t\rightarrow\infty}x_1(t)+x_2(t)=0,\\
&
\lim_{t\rightarrow\infty}\psi_y(\mathbf{y}(t),t)=\lim_{t\rightarrow\infty}y_1(t)+y_2(t)=0
\end{split}
\end{equation}
guarantee that
\[
\lim_{t\rightarrow\infty} x_1(t)=0, \
\lim_{t\rightarrow\infty}x_2(t)=0, \ \lim_{t\rightarrow\infty}
y_1(t)=0, \ \lim_{t\rightarrow\infty}y_2(t)=0
\]
Hence, property (\ref{eq:example_control_goal_limit}) ensures asymptotic reaching of the control goal.

According to equations (\ref{control_s1}), (\ref{control_s2})
control functions
\begin{equation}\label{eq:example_control}
\begin{split}
u_x&=-\lambda_x\psi_x-x_2-f_x(x_1,\hat{\theta}_x)\\
u_y&=-\lambda_y\psi_y-y_2-f_y(y_1,\hat{\theta}_y), \ \lambda_x, \
\lambda_y>0
\end{split}
\end{equation}
transform system (\ref{eq:example_dec_model}) into the following form
\begin{equation}\label{eq:example_error_model}
\begin{split}
\dot{\psi}_x&=-\lambda_x \psi_x +
f_x(x_1,\theta_x)-f_x(x_1,\hat{\theta}_x)+k_1 y_1\\
\dot{\psi}_x&=-\lambda_x \psi_x +
f_x(x_1,\theta_x)-f_x(x_1,\hat{\theta}_x)+k_2 x_1
\end{split}
\end{equation}
Notice that systems
\[
\dot{\psi}_x=-\lambda_x \psi_x +\xi_x(t), \
\dot{\psi}_y=-\lambda_y \psi_t +\xi_y(t)
\]
satisfy Assumption \ref{assume:gain} with
\[
\gamma_{x_{2,2}}=\frac{1}{\lambda_x}\|\psi_x(\mathbf{x}(t),t)\|_{2,[t_0,T]},
\
\gamma_{y_{2,2}}=\frac{1}{\lambda_y}\|\psi_y(\mathbf{y}(t),t)\|_{2,[t_0,T]}
\]
respectively, and functions $f_x(\cdot)$, $f_y(\cdot)$ satisfy Assumptions \ref{assume:alpha}, \ref{assume:alpha_upper} with
\[
\begin{split}
&D_{x}=1.5, \ D_{x,1}=0.5, \ \alpha_x(\mathbf{x},t)= x_1-x_0, \\
&D_{y}=1.6, \ D_{y,1}=0.4, \ \alpha_y(\mathbf{y},t)= y_1-y_0
\end{split}
\]
Hence conditions 1)-4) of Theorem \ref{theorem:interconnection}
are satisfied. Furthermore, according to the remarks regarding condition 5) of the theorem, requirements
(\ref{eq:disturbance_gain}) can be replaced with implicit constraints (\ref{eq:disturbance_gain_1}). These, however,
according to (\ref{eq:example_L2_gains}) also hold with
$\beta_x=k_1$, $\beta_y=k_2$.

Given that $\alpha_x(\mathbf{x},t)=x_1-x_0$, $\alpha_y(\mathbf{y},t)=y_1-y_0$,
Assumption \ref{assume:explicit_realizability} will be satisfied for functions $\alpha_x(\mathbf{x},t)$, $\alpha_y(\mathbf{y},t)$ with
$\Psi_x(\cdot)=0$, $\Psi_y(\cdot)=0$. Therefore, adaptation algorithms (\ref{fin_forms_ours_tr1x}),
(\ref{fin_forms_ours_tr1y}) will have the following form:
\begin{eqnarray}\label{eq:example_adaptation}
\hat{\theta}_x&=& \Gamma_x((x_1+x_2) (x_1-x_0) +
\hat{\theta}_{x,I}),\nonumber \\
\dot{\hat\theta}_{x,I}&=& \lambda_x (x_1+x_2)(x_1-x_0) - (x_1+x_2)x_2\nonumber \\
\hat{\theta}_y&=& \Gamma_y((y_1+y_2) (y_1-y_0) +
\hat{\theta}_{y,I}),\\
\dot{\hat\theta}_{y,I}&=& \lambda_y (y_1+y_2)(y_1-y_0) -
(y_1+y_2)y_2\nonumber
\end{eqnarray}
Hence, according to Theorem \ref{theorem:interconnection}
boundedness of the solutions in the closed loop system
(\ref{eq:example_error_model}), (\ref{eq:example_adaptation}) is ensured upon the following condition
\begin{equation}\label{eq:example_condition_boundedness}
\frac{k_1 k_2}{\lambda_x
\lambda_y}\left(1+\frac{D_x}{D_{x,1}}\right)\left(1+\frac{D_y}{D_{y,1}}\right)<1
\Rightarrow k_1 k_2 < \frac{\lambda_x\lambda_y}{20}
\end{equation}
Moreover, given that properties H\ref{hyp:locally_bound_uniform_f}--
H\ref{hyp:locally_bound_uniform_phi} hold for the chosen functions
$\psi_x(\mathbf{x},t)$, $\psi_y(\mathbf{y},t)$, condition
(\ref{eq:example_condition_boundedness}) guarantees that limiting relations (\ref{eq:convergence_psi_xy}),
(\ref{eq:convergence_f_xy}) hold.

Trajectories of the closed loop system
(\ref{eq:example_dec_model}), (\ref{eq:example_control}),
(\ref{eq:example_adaptation}) with the following values of parameters $\Gamma_x=\Gamma_y=1$, $\lambda_x=\lambda_y=2$,
$x_0=y_0=1$, $\theta_x=\theta_y=1$ and initial conditions
$x_1(0)=-1$, $x_2(0)=0$, $y_1(0)=1$, $y_2(0)=0$,
$\hat{\theta}_{x,I}(0)=-1$, $\hat{\theta}_{y,I}(0)=-2$ are provided in Fig. \ref{fig:decentralized:example}.

\begin{figure}
\begin{center}
\includegraphics[width=300pt]{example_decentralized.eps}
\end{center}
\begin{center}
\caption{Plots of trajectories $x_1(t)$ (panel a), $x_2(t)$ (panel b), $y_1(t)$ (panel c), $y_2(t)$ (panel d) as functions of $t$ in closed loop system (\ref{eq:example_dec_model}),
(\ref{eq:example_control}), (\ref{eq:example_adaptation}). Dotted lines correspond to the case when $k_1=k_2=0.4$, and solid lines stand for solutions obtained with the following values of coupling
$k_1=1$, $k_2=0.1$}\label{fig:decentralized:example}
\end{center}
\end{figure}

\section{Conclusion}

We provided new tools for the design and analysis of adaptive decentralized control schemes. Our method allows the desired dynamics to be Lyapunov unstable and the parametrization of the uncertainties to be nonlinear. The results are based on a formulation of the problem for adaptive control as a problem of regulation in functional spaces (in particular, $L_2^1[t_0,T]$
spaces) rather than of simply reaching of the control goal in
$\mathbb{R}^n$. This allows us to introduce adaptation algorithms with new properties and apply a small-gain argument to establish applicability of these schemes to the problem of decentralized control.

In order to avoid unnecessary complications, state feedback was assumed in the main-loop controllers which transform original equation into the error coupled model. Extension of the results to output-feedback main loop controllers is a topic for future study.

\section{Proofs of the theorems}

\subsection{Proof of Theorem \ref{stability_theorem}}

Let us first show that property P1) holds. Consider solutions of system (\ref{system1}), (\ref{error_model_d}),
(\ref{fin_forms_ours_tr1}), (\ref{fin_forms_ours_tr11}) passing through the point $\mathbf{x}(t_0)$, $\hat{{\boldsymbol{\theta}}}_I(t_0)$ for
$t\in[t_0,T^\ast]$
. Let us calculate the time-derivative of function
$\hat{{\boldsymbol{\theta}}}(\mathbf{x},t)$:
$\dot{\hat{{\boldsymbol{\theta}}}}(\mathbf{x},t)=\Gamma({\dot{\hat{{\boldsymbol{\theta}}}}_{P}}+\dot{\hat{\boldsymbol{\theta}}}_I)=\Gamma({\dot\psi}\boldsymbol{\alpha}(\mathbf{x},t)+\psi\dot{\boldsymbol{\alpha}}(\mathbf{x},t)-\dot{\Psi}(\mathbf{x},t)+\dot{\hat{\boldsymbol{\theta}}}_I)$.
Notice that
\begin{equation}\label{t2_1}
\begin{split}
&\psi\dot{\boldsymbol{\alpha}}(\mathbf{x},t)-\dot{\Psi}(\mathbf{x},t)+\dot{\hat{{\boldsymbol{\theta}}}}_I=\psi(\mathbf{x},t)\frac{{\partial}
\boldsymbol{\alpha}(\mathbf{x},t)}{{\partial} \mathbf{x}_1}\dot{\mathbf{x}}_1+\psi(\mathbf{x},t)\frac{{\partial}
\boldsymbol{\alpha}(\mathbf{x})}{{\partial} \mathbf{x}_2}\dot{\mathbf{x}}_2 +\\
& \psi(\mathbf{x},t)\frac{{\partial}
\boldsymbol{\alpha}(\mathbf{x},t)}{{\partial} t}-
\frac{{\partial} \Psi(\mathbf{x},t)}{{\partial} \mathbf{x}_1}\dot{\mathbf{x}}_1-\frac{{\partial}
\Psi(\mathbf{x},t)}{{\partial} \mathbf{x}_2}\dot{\mathbf{x}}_2-\frac{{\partial} \Psi(\mathbf{x},t)}{{\partial} t}+\dot{\hat{\boldsymbol{\theta}}}_I
\end{split}
\end{equation}
According to Assumption \ref{assume:explicit_realizability},
$\frac{{\partial} \Psi(\mathbf{x},t)}{{\partial} \mathbf{x}_2}=\psi(\mathbf{x},t)\frac{{\partial}
\boldsymbol{\alpha}(\mathbf{x},t)}{{\partial} \mathbf{x}_2}
$. Then taking into account (\ref{t2_1}), we obtain
\begin{equation}\label{t2_2}
\begin{split}
&
\psi\dot{\boldsymbol{\alpha}}(\mathbf{x},t)-\dot{\Psi}(\mathbf{x},t)+\dot{\hat{{\boldsymbol{\theta}}}}_I=\left(\psi(\mathbf{x},t)\frac{{\partial}
\boldsymbol{\alpha}(\mathbf{x},t)}{{\partial} \mathbf{x}_1}-\frac{{\partial} \Psi}{{\partial} \mathbf{x}_1
}\right)\dot{\mathbf{x}}_1\\
&+\psi(\mathbf{x},t)\frac{{\partial} \boldsymbol{\alpha}(\mathbf{x},t)}{{\partial} t}-\frac{\Psi(\mathbf{x},t)}{{\partial} t}
\end{split}
\end{equation}
Notice that according to the proposed notation we can rewrite the term $\left(\psi(\mathbf{x},t)\frac{{\partial} \boldsymbol{\alpha}(\mathbf{x},t)}{{\partial}
\mathbf{x}_1}-\frac{{\partial} \Psi}{{\partial} \mathbf{x}_1 }\right)\dot{\mathbf{x}}_1$ in the following form: $\psi(\mathbf{x},t)L_{\mathbf{f}_1}
\boldsymbol{\alpha}(\mathbf{x},t)-L_{\mathbf{f}_1} \Psi(\mathbf{x},t)+
\left(\psi(\mathbf{x},t)L_{\mathbf{g}_1} \boldsymbol{\alpha}(\mathbf{x},t)-L_{\mathbf{g}_1}
\Psi(\mathbf{x},t)\right)u(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)$. Hence, it follows from (\ref{fin_forms_ours_tr1}) and (\ref{t2_2}) that
$\psi\dot{\boldsymbol{\alpha}}(\mathbf{x},t)-\dot{\Psi}(\mathbf{x},t)+\dot{\hat{{\boldsymbol{\theta}}}}_I=\varphi(\psi)\boldsymbol{\alpha}(\mathbf{x},t)
$. Therefore, the derivative $\dot{\hat{\boldsymbol{\theta}}}(\mathbf{x},t)$ can be written in the following way:
\begin{equation}\label{algorithm_dpsi}
\dot{\hat{{\boldsymbol{\theta}}}}=\Gamma({\dot\psi}+\varphi(\psi))\boldsymbol{\alpha}(\mathbf{x},t)
\end{equation}
Asymptotic properties of nonlinear parameterized control systems with adaptation algorithm (\ref{algorithm_dpsi}) under assumption of Lyapunov stability of the target dynamics were investigated in
\cite{tpt2003_tac}. In the present contribution we aim to provide characterizations of the closed loop system in terms of functional mappings between functions $\psi(\mathbf{x}(t),t)$, $\varepsilon(t)$,
and $f(\mathbf{x}(t),{\boldsymbol{\theta}},t)-f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)$ and without requiring Lyapunov stability of the target dynamics
(\ref{eq:target_dynamics}).

For this purpose consider the following positive-definite function:
\begin{equation}\label{V_theta}
V_{\hat{{\boldsymbol{\theta}}}}(\hat{{\boldsymbol{\theta}}},{\boldsymbol{\theta}},t)=
\frac{1}{2}\|\hat{{\boldsymbol{\theta}}}-{\boldsymbol{\theta}}\|^2_{\Gamma^{-1}} +
\frac{D}{4 D_1^2} \int_{t}^\infty\varepsilon^2(\tau)d\tau
\end{equation}
Its time-derivative according to equations (\ref{algorithm_dpsi})
can be obtained as follows:
\begin{equation}\label{eq:dV_full_alg}
\dot{V}_{\hat{{\boldsymbol{\theta}}}}(\hat{{\boldsymbol{\theta}}},{\boldsymbol{\theta}},t)=(\varphi(\psi)+{\dot\psi})(\hat{{\boldsymbol{\theta}}}-{\boldsymbol{\theta}})^{T}\boldsymbol{\alpha}(\mathbf{x},t)
-
\frac{D}{4 D_1^2}\varepsilon^2(t)
\end{equation}
Hence using Assumptions \ref{assume:alpha},
\ref{assume:alpha_upper} and equality (\ref{error_model_d}) we can estimate the derivative $\dot{V}_{\hat{{\boldsymbol{\theta}}}}$ as follows:
\begin{eqnarray}\label{parameric_deviation_derivative}
& &
\dot{V}_{\hat{{\boldsymbol{\theta}}}}(\hat{{\boldsymbol{\theta}}},{\boldsymbol{\theta}},t)\leq-(f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)-f(\mathbf{x},{\boldsymbol{\theta}},t)+\varepsilon(t))(\hat{{\boldsymbol{\theta}}}-{\boldsymbol{\theta}})^{T}\boldsymbol{\alpha}(\mathbf{x},t)
- \frac{D}{4 D_1^2}\varepsilon^2(t)\nonumber
\\
& &
\leq-\frac{1}{D}(f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)-f(\mathbf{x},{\boldsymbol{\theta}},t))^2+\frac{1}{D_1}|\varepsilon(t)||f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)-f(\mathbf{x},{\boldsymbol{\theta}},t)|\nonumber\\
& & - \frac{D}{4 D_1^2}\varepsilon^2(t) \leq -
\frac{1}{D}\left(|f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)-f(\mathbf{x},{\boldsymbol{\theta}},t)|-\frac{D}{2 D_1} \varepsilon(t)\right)^2 \leq 0
\end{eqnarray}
It follows immediately from
(\ref{parameric_deviation_derivative}), (\ref{V_theta}) that
\begin{equation}\label{eq:parametric_norm}
\|\hat{{\boldsymbol{\theta}}}(t)-{\boldsymbol{\theta}}\|^{2}_{\Gamma^{-1}}\leq
\|\hat{{\boldsymbol{\theta}}}(t_0)-{\boldsymbol{\theta}}\|^{2}_{\Gamma^{-1}}+\frac{D}{2 D_1^2}\|\varepsilon(t)\|^{2}_{2,[t_0,\infty]}
\end{equation}
In particular, for $t\in[t_0,T^\ast]$ we can derive from
(\ref{V_theta}) that
$\|\hat{{\boldsymbol{\theta}}}(t)-{\boldsymbol{\theta}}\|^{2}_{\Gamma^{-1}}\leq
\|\hat{{\boldsymbol{\theta}}}(t_0)-{\boldsymbol{\theta}}\|^{2}_{\Gamma^{-1}}+\frac{D}{2 D_1^2}\|\varepsilon(t)\|^{2}_{2,[t_0,T^\ast]}$. Therefore
$\hat{{\boldsymbol{\theta}}}(t)\in L_\infty^2[t_0,T^\ast]$. Furthermore
$|f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)-f(\mathbf{x}(t),{\boldsymbol{\theta}},t)|-\frac{D}{2 D_1} \varepsilon(t)\in L_{2}^1 [t_0,T^\ast]$. In particular
\begin{eqnarray}\label{eq:t1_ins1}
&
&\left\||f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)-f(\mathbf{x}(t),{\boldsymbol{\theta}},t)|-\frac{D}{2 D_1} \varepsilon(t)\right\|_{2,[t_0,T^\ast]}^2\leq
\nonumber\\
&&\frac{D}{2}\|{\boldsymbol{\theta}}-\hat{{\boldsymbol{\theta}}}(t_0)\|^{2}_{\Gamma^{-1}}+\frac{D^2}{4 D_1^2}\|\varepsilon(t)\|_{2,[t_0,T^\ast]}^2
\end{eqnarray}
Hence $f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)-f(\mathbf{x}(t),{\boldsymbol{\theta}},t)\in L_{2}^1 [t_0,T^\ast]$ as a sum of two functions from $L_{2}^1
[t_0,T^\ast]$. In order to estimate the upper bound of the norm
$\|f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)-f(\mathbf{x}(t),{\boldsymbol{\theta}},t)\|_{2,[t_0,T^\ast]}$
from (\ref{eq:t1_ins1}) we use the Minkowski inequality:
\begin{eqnarray}
&&\left\|f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)-f(\mathbf{x}(t),{\boldsymbol{\theta}},t)|-\frac{D}{2 D_1} \varepsilon(t)\right\|_{2,[t_0,T^\ast]}\leq
\nonumber\\
&&\left(\frac{D}{2}\|{\boldsymbol{\theta}}-\hat{{\boldsymbol{\theta}}}(t_0)\|^{2}_{\Gamma^{-1}}\right)^{0.5}+
\frac{D}{2 D_1}\|\varepsilon(t)\|_{2,[t_0,T^\ast]}\nonumber
\end{eqnarray}
and then apply the triangle inequality to the functions from
$L_{2}^1 [t_0,T^\ast]$:
\begin{eqnarray}\label{eq:t1_ins2}
& &
\|f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)-f(\mathbf{x}(t),{\boldsymbol{\theta}},t)\|_{2,[t_0,T^\ast]}\leq\nonumber\\
& &
\left\|f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)-f(\mathbf{x}(t),{\boldsymbol{\theta}},t)-\frac{D}{2 D_1}\varepsilon(t)\right\|_{2,[t_0,T^\ast]}+\\
& & \frac{D}{2 D_1}\|\varepsilon(t)\|_{2,[t_0,T^\ast]}\leq
\left(\frac{D}{2}\|{\boldsymbol{\theta}}-\hat{{\boldsymbol{\theta}}}(t_0)\|^{2}_{\Gamma^{-1}}\right)^{0.5}
+ \frac{D}{D_1}\|\varepsilon(t)\|_{2,[t_0,T^\ast]}\nonumber
\end{eqnarray}
Therefore, property P1) is proven.

Let us prove property P2). In order to do this we have to check first if the solutions of the closed loop system are defined for all $t\in\mathbb{R}_+$, i.e. they do not go to infinity in finite time.
We prove this by a contradiction argument. Indeed, let there exists time instant $t_s$ such that $\|\mathbf{x}(t_s)\|=\infty$. It follows from P1), however, that
$f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)-f(\mathbf{x}(t),{\boldsymbol{\theta}},t)\in L_{2}^1
[t_0,t_s]$. Furthermore, according to (\ref{eq:t1_ins2}) the norm
$\|f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)-f(\mathbf{x}(t),{\boldsymbol{\theta}},t)\|_{2,[t_0,t_s]}$
can be bounded from above by a continuous function of ${\boldsymbol{\theta}},
\ \hat{{\boldsymbol{\theta}}}(t_0)$, $\Gamma$, and
$\|\varepsilon(t)\|_{2,[t_0,\infty]}$. Let us denote this bound by symbol $D_f$. Notice that $D_f$ does not depend on $t_s$. Consider system (\ref{error_model_d}) for $t\in[t_0,t_s]$:
${\dot\psi}=f(\mathbf{x},{\boldsymbol{\theta}},t)-f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)-\varphi(\psi,{\boldsymbol{\omega}},t)+\varepsilon(t)$.
Given that both
$f(\mathbf{x}(t),{\boldsymbol{\theta}},t)-f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t),
\varepsilon(t) \in L_{2}^1 [t_0,t_s]$ and taking into account Assumption \ref{assume:gain}, we automatically obtain that
$\psi(\mathbf{x}(t),t)\in L_\infty^{1}[t_0,t_s]$. In particular, using the triangle inequality and the fact that the function
$\gamma_{\infty,2}\left(\psi(\mathbf{x}_0,t_0),{\boldsymbol{\omega}},M\right)$ in Assumption \ref{assume:gain} is non-decreasing in $M$, we can estimate the norm $\|\psi(\mathbf{x}(t),t)\|_{\infty,[t_0,t_s]}$ as follows:
\begin{equation}\label{eq:bound_psi}
\|\psi(\mathbf{x}(t),t)\|_{\infty,[t_0,t_s]}\leq
\gamma_{\infty,2}\left(\psi(\mathbf{x}_0,t_0),{\boldsymbol{\omega}},D_f+\|\varepsilon(t)\|^2_{2,[t_0,\infty]}\right)
\end{equation}
According to Assumption \ref{assume:psi} the following inequality holds:
\begin{equation}\label{eq:bound_x}
\|\mathbf{x}(t)\|_{\infty,[t_0,t_s]}\leq\tilde{\gamma}\left(\mathbf{x}_0,{\boldsymbol{\theta}},\gamma_{\infty,2}\left(\psi(\mathbf{x}_0,t_0),{\boldsymbol{\omega}},D_f+\|\varepsilon(t)\|^2_{2,[t_0,\infty]}\right)\right)
\end{equation}
Given that a superposition of locally bounded functions is locally bounded, we conclude that $\|\mathbf{x}(t)\|_{\infty[t_0,t_s]}$ is bounded. This, however, contradicts to the previous claim that
$\|\mathbf{x}(t_s)\|=\infty$. Taking into account inequality
(\ref{eq:parametric_norm}) we can derive that both
$\hat{{\boldsymbol{\theta}}}(\mathbf{x}(t),t)$ and $\hat{{\boldsymbol{\theta}}}_I(t)$ are bounded for every $t\in\mathbb{R}_+$. Moreover, according to
(\ref{eq:bound_psi}), (\ref{eq:bound_x}),
(\ref{eq:parametric_norm}) these bounds are themselves locally bounded functions of initial conditions and parameters. Therefore,
$\mathbf{x}(t)\in L^n_\infty[t_0,\infty]$,
$\hat{{\boldsymbol{\theta}}}(\mathbf{x}(t),t)\in L^d_\infty [t_0,\infty]$.
Inequality (\ref{eq:psi_gain}) follows immediately from
(\ref{eq:t1_ins2}), (\ref{eq:gain_psi_L2}), and the triangle inequality. Property P2) is proven.

Let us show that P3) holds. It is assumed that system
(\ref{eq:target_dynamics}) has $L_{2}^1 [t_0,\infty]\mapsto L_{p}^1 [t_0,\infty]$, $p>1$ gain. In addition, we have just shown that $f(\mathbf{x}(t),{\boldsymbol{\theta}},t)-f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t),
\varepsilon(t) \in L_{2} [t_0,\infty]$. Hence, taking into account equation (\ref{error_model_d}) we conclude that
$\psi(\mathbf{x}(t),t)\in L_{p}^1 [t_0,\infty]$, $p>1$. On the other hand, given that $f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)$,
$\varphi(\psi,{\boldsymbol{\omega}},t)$ are locally bounded with respect to their first two arguments uniformly in $t$, and that $\mathbf{x}(t)\in L_{\infty}^n[t_0,\infty]$,$\psi(\mathbf{x}(t),t)\in L_\infty^1[t_0,\infty]$, $\hat{{\boldsymbol{\theta}}}(t)\in L_\infty^d[t_0,\infty]$, ${\boldsymbol{\theta}}\in\Omega_\theta$, the signal
$\varphi(\psi(\mathbf{x}(t),t),{\boldsymbol{\omega}},t)+f(\mathbf{x}(t),{\boldsymbol{\theta}},t)-f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)$
is bounded. Then $\varepsilon(t)\in L_\infty^1[t_0,\infty]$
implies that ${\dot\psi}$ is bounded, and P3) is guaranteed by Barbalat's lemma.

To complete the proof of the theorem (property P4) consider the time-derivative of function $f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)$:
\[
\begin{split}
&\frac{d}{dt}f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)=L_{\mathbf{f}(\mathbf{x},{\boldsymbol{\theta}})+\mathbf{g}(\mathbf{x})u(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)}f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)+\\
& \frac{{\partial} f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)}{{\partial} \hat{{\boldsymbol{\theta}}}}\Gamma
(\varphi(\psi,{\boldsymbol{\omega}},t)+{\dot\psi})\boldsymbol{\alpha}(\mathbf{x},t)+\frac{{\partial} f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)}{{\partial} t}
\end{split}
\]
Taking into account that the function $f(\mathbf{x},{\boldsymbol{\theta}},t)$ is continuously differentiable in $\mathbf{x}$, ${\boldsymbol{\theta}}$; the derivative
$ {\partial} {f(\mathbf{x},{\boldsymbol{\theta}},t)}/{{\partial} t}$ is locally bounded with respect to $\mathbf{x}$, ${\boldsymbol{\theta}}$ uniformly in $t$; functions
$\boldsymbol{\alpha}(\mathbf{x},t)$, ${\partial} \psi(\mathbf{x},t)/{\partial} t$ are locally bounded with respect to $\mathbf{x}$ uniformly in $t$, then $d/dt
(f(\mathbf{x},{\boldsymbol{\theta}},t)-f(\mathbf{x},\hat{\boldsymbol{\theta}},t))$ is bounded. Then given that
$f(\mathbf{x}(t),{\boldsymbol{\theta}},t)-f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)\in L_{2}^1
[t_0,\infty]$ by applying Barbalat's lemma we conclude that
$f(\mathbf{x},{\boldsymbol{\theta}},\tau)-f(\mathbf{x},\hat{\boldsymbol{\theta}},\tau)\rightarrow 0$
as $t\rightarrow\infty$. { The theorem is proven.}

\subsection{Proof of Theorem \ref{theorem:interconnection}}

Let us denote
\[
\Delta f_x[t_0,T]=
\|f_x(\mathbf{x},{\boldsymbol{\theta}}_x,t)-f_x(\mathbf{x},\hat{{\boldsymbol{\theta}}}_x,t)\|_{2,[t_0,T]},
\]
\[
\Delta f_y
[t_0,T]=\|f_x(\mathbf{y},{\boldsymbol{\theta}}_y,t)-f_y(\mathbf{y},\hat{{\boldsymbol{\theta}}}_y,t)\|_{2,[t_0,T]}.
\]
As follows from Theorem \ref{stability_theorem} the following inequalities hold
\begin{equation}\label{proof:interconnection:t1}
\Delta f_x[t_0,T]\leq C_x + \frac{D_x}{D_{1,x}}
\|h_y(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]}
\end{equation}
\begin{equation}\label{proof:interconnection:t2}
\Delta f_y[t_0,T]\leq C_y + \frac{D_y}{D_{1,y}}
\|h_x(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]},
\end{equation}
where $C_x$, $C_y$ are some constants, independent of $T$. Taking estimates (\ref{proof:interconnection:t1}),
(\ref{proof:interconnection:t2}) into account we obtain the following estimates:
\begin{equation}\label{proof:interconnection:t3}
\begin{split}
&\Delta f_x[t_0,T]+\|h_y(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]}\leq \\
&C_x + \left(\frac{D_x}{D_{1,x}}+1\right)
\|h_y(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]}
\end{split}
\end{equation}
\begin{equation}\label{proof:interconnection:t4}
\begin{split}
&\Delta f_y[t_0,T]+\|h_x(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]}\leq
\\
&C_y + \left(\frac{D_y}{D_{1,y}}+1\right)
\|h_x(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]},
\end{split}
\end{equation}
The proof of the theorem would be complete if we show that the
$L_2^1[t_0,T]$ norms of $h_x(\mathbf{x}(t),\mathbf{y}(t),t)$,
$h_y(\mathbf{x}(t),\mathbf{y}(t),t)$ are globally bounded uniformly in $T$.
Let us show that this is indeed the case. Using the widely known generalized triangular inequality \cite{Jiang_1994}
\[
\gamma(a + b)\leq \gamma((\rho+Id)(a))+\gamma((\rho+Id)\circ
\rho^{-1}(b)), \ a,b\in\mathbb{R}_+, \ \gamma,\rho\in\mathcal{K}_\infty,
\]
equations (\ref{proof:interconnection:t3}),
(\ref{proof:interconnection:t4}) and also property
(\ref{eq:disturbance_gain}), we conclude that
\begin{equation}\label{proof:interconnection:t5}
\begin{split}
&\|h_y(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]}\leq\\
&\beta_y\cdot \gamma_{y_{2,2}}\circ\rho_1
\left(\left(\frac{D_y}{D_{1,y}}+1\right)\|h_x(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]}\right)+C_{y,1}\\
&\|h_x(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]}\leq\\
& \beta_x\cdot \gamma_{x_{2,2}}\circ\rho_2
\left(\left(\frac{D_x}{D_{1,x}}+1\right)\|h_y(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]}\right)+C_{x,1}
\end{split}
\end{equation}
where $\rho_1(\cdot)$, $\rho_2(\cdot)\in\mathcal{K}_\infty$,
$\rho_1(\cdot), \rho_2(\cdot)>Id(\cdot)$. Then, according to
(\ref{proof:interconnection:t5}), the existence of
$\rho_3(\cdot)\in\mathcal{K}_\infty\geq Id(\cdot)$, satisfying inequality
\[
\beta_y\circ\gamma_{y_{2,2}}\circ\rho_1\circ\left(\frac{D_y}{D_{y,1}}+1\right)\circ\rho_3\circ
\beta_x\circ
\gamma_{x_{2,2}}\circ\rho_2\circ\left(\frac{D_x}{D_{x,1}}+1\right)(\Delta)<
\Delta \ \forall \ \Delta\geq \bar{\Delta}
\]
for some $\bar{\Delta}\in\mathbb{R}_+$ ensures that the norms
\[
\|h_y(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]}, \
\|h_x(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]}
\]
are globally uniformly bounded in $T$. The rest of the proof follows from Theorem \ref{stability_theorem}. { The theorem is proven.}
\title{Grounded Relational Inference: Domain Knowledge Driven Explainable Autonomous Driving}

\begin{abstract}
Explainability is essential for autonomous vehicles and other robotics systems interacting with humans and other objects during operation. Humans need to understand and anticipate the actions taken by the machines for trustful and safe cooperation. In this work, we aim to develop an explainable model that generates explanations consistent with both human domain knowledge and the model's inherent causal relation. In particular, we focus on an essential building block of autonomous driving\textemdash multi-agent interaction modeling. We propose Grounded Relational Inference (GRI). It models an interactive system's underlying dynamics by inferring an interaction graph representing the agents' relations. We ensure a semantically meaningful interaction graph by grounding the relational latent space into semantic interactive behaviors defined with expert domain knowledge. We demonstrate that it can model interactive traffic scenarios under both simulation and real-world settings, and generate semantic graphs explaining the vehicle's behavior by their interactions.

\end{abstract}

\section{Introduction}
\label{sec:introduction}
\IEEEPARstart{D}{eep} learning has been utilized to address various autonomous driving problems \cite{bojarski2016end,chen2017multi,chen2019dob}. However, deep neural networks lack the transparency that helps people understand their underlying mechanism. It is a crucial drawback for safety-critical applications with humans involved (e.g., autonomous vehicles). Humans need to understand and anticipate the actions taken by the machines for trustful and safe cooperation. In response to this problem, the concept of explainable AI (XAI) was introduced. It refers to machine learning techniques that provide details and reasons that make a model's mechanism easy to understand \cite{arrieta2020explainable}. Most of the existing works for deep learning models focus on post-hoc explanations \cite{arrieta2020explainable}. They enhance model explainability by unraveling the underlying mechanisms of a trained model: Vision-based approaches, such as visual attention \cite{kim2017interpretable} and deconvolution \cite{bojarski2018visualbackprop}, illustrate which segments of the input image affect the outputs; Interaction-aware models, such as social LSTM with social attention \cite{alahi2016social,vemula2018social} and graph neural networks (GNN) with graph attention \cite{hoshen2017vain, velivckovic2017graph, sukhbaatar2016learning, kipf2018neural}, identify the agents that are critical to the decision-making procedure.

Although promising, post-hoc explanations could be ambiguous and falsely interpreted by humans. \textcolor{black}{For instance, a visual attention map only illustrates which regions of the input image the output of the model depends on. The semantic meaning behind the causal relation is left for human users to interpret. Kim et al. \cite{kim2018textual} attempted to resolve the ambiguity by aligning textual explanations with visual attention. However, the underlying mechanism of the model is not necessarily consistent with the textual explanations. To truly build trust with humans, we argue that a deep learning model for an autonomous system should be equipped with explanations consistent with both \emph{human domain knowledge} and the model's \emph{inherent causal relation}}.

\textcolor{black}{In this work, we explore how to approach such an explainable model for an essential building block of autonomous driving\textemdash multi-agent interaction modeling. In particular, we focus on the relational inference problem studied in \cite{kipf2018neural}. Kipf et al. propose the Neural Relational Inference (NRI) model, which models an interactive system by explicitly inferring its inherent interactions. Formally, the NRI model aims to solve a reconstruction task. Given the observed trajectories of all the objects, an encoder first infers the interactions between objects represented by a latent interaction graph, whose edges are aligned with discrete latent variables corresponding to a cluster of pairwise interaction behaviors between the objects. Afterward, a decoder that learns the dynamical model conditioned on the inferred interaction graph then reconstructs the trajectories given the initial states. If the decoder can accurately reconstruct the trajectories, it indicates that the latent space effectively models the interactions.}

\textcolor{black}{We find this discrete latent space interesting because the inferred interaction graph could potentially serve as an explanation directly: it explains the reconstructed trajectories as a sequence of interaction behaviors among agents. Moreover, the reconstructed trajectories are governed by the same interaction graph. Therefore, the NRI model seems promising to fulfill our goal to make the explanation consistent with the model's underlying mechanism. However, since the NRI model learns the latent space in an unsupervised manner, it is difficult for humans to interpret the semantic meaning behind those interaction behaviors, which makes the interaction graph ambiguous as an explanation. To address this issue, we propose to ground the latent space in a set of interactive behaviors defined with human domain knowledge.}

\begin{figure*}[t]
\centering
\includegraphics[width=6.4in]{example.pdf}
\caption{A motivating lane-changing scenario where we ask different models to control the red vehicle. All the models generate deceleration commands but have different intermediate outputs. With the aid of visual attention, we generate a heat map indicating the critical pixels of the input image. Graph attention network assigns edge weights $\omega_i$ to specify the importance of surrounding vehicles to the controlled vehicle. However, the attention mechanisms cannot recognize different effects\textemdash the two cars are mutually important but affect each other in distinct ways. The NRI model can distinguish between different interactive behaviors by assigning different values to the latent variables $z_i$ in the interaction graph. Still, the latent space does not have explicit semantic meaning. In contrast, our model ensures a semantic interaction graph, which illustrates the model's understanding of the scenario and explain the action it takes. It determines the interaction graph with a latent space grounded in yielding and cutting-in behaviors. It learns control policies that generate behaviors consistent with their definitions in domain knowledge (e.g., traffic rules) and executes the corresponding policies according to the inferred edge types.}
\label{fig:example}
\end{figure*}

As a running example, consider the scenario depicted in Fig. \ref{fig:example}, where we ask different models to control the red vehicle. Attention mechanisms can indicate the critical pixels or agents, but they cannot recognize different effects\textemdash the two cars are mutually important but affect each other in distinct ways. The NRI model can distinguish between different interactive behaviors. Still, the latent space does not have explicit semantic meaning. In contrast, our model should determine the interaction graph with a latent space grounded in yielding and cutting-in behaviors. It learns control policies that generate behaviors consistent with their definitions in domain knowledge (e.g., traffic rules) and executes the corresponding policies according to the inferred edge types. This semantic interaction graph illustrates the model's understanding of the scenario and explains the action it takes.

\textcolor{black}{If we merely want to make the interaction graph consistent with humans' labeling of the scenes, a straightforward approach is training the encoder directly via supervised learning.} Interaction labels can be either obtained from human experts \cite{sun2018probabilistic} or rule-based labeling functions \cite{lee2019joint}. \textcolor{black}{However, labels for the interaction graph are insufficient to induce the decoder to synthesize the interactive behaviors suggested by the labels, because the model cannot capture the semantic meaning behind those interaction labels.} Instead, we recast relational inference into an inverse reinforcement learning (IRL) problem and introduce structured reward functions to ground the latent space. Concretely, we model the system as a multi-agent Markov decision process (MDP), where the agents share a reward function that depends on the relational latent space. We design structured reward functions based on expert domain knowledge to explicitly define the interactive behaviors corresponding to the latent space. To solve the formulated IRL problem, we propose Grounded Relational Inference (GRI). It has a variational-autoencoder-like (VAE) GNN in NRI \cite{kipf2018neural} as the backbone model. Additionally, we incorporate the structured reward functions into the model as an additional reward decoder. A variational extension of the adversarial inverse reinforcement learning (AIRL) algorithm is derived to train all the modules simultaneously.

\textcolor{black}{Compared to direct supervision via interaction labels, we provide implicit supervision to GRI in terms of the structures of the reward functions. Since each reward function defines a type of interactive behavior, we confine the latent space to a cluster of interactive behaviors. It mainly has two advantages over supervision through labeling: 1) First, since the policy decoder learns to maximize the cumulative reward given the inferred interaction graph, the structured reward functions guide the policy to synthesize the corresponding semantic behaviors, rather than simply mimicking the demonstrated trajectories; 2) Second, the end-to-end training scheme leaves the model to identify the underlying interaction graph of the observed trajectories and learn the characteristics of different behaviors (i.e., parameters of reward functions) from data. It avoids the undesired bias introduced during the labeling procedure. Labels generated by human experts are subjective. Different people may interpret an interacting scenario in different ways. In contrast, there exist systematic and principled ways to investigate what reward functions human behavior is subject to from data \cite{naumann2020analyzing}.}

\textcolor{black}{The remaining content is organized as follows. In Section \ref{sec:related-work}, we gives a concise review on existing works that are closely related to ours in terms of methodology or motivation. In Section \ref{sec:background}, we briefly summarize NRI and AIRL to prepare the readers for the core technical content. In Section \ref{sec:formulation}, we introduce how we reformulate relational inference into a multi-agent IRL problem with relational latent space. In Sec \ref{sec:method}, we present the GRI model in a general context. In Section \ref{sec:experiments}, we demonstrate how we apply the proposed framework to model some simple traffic scenarios in both simulation and real-world settings. The experimental results show that GRI can model interactive traffic scenarios, and generate semantic interaction graphs that are consistent with both human domain knowledge and the modeled interactive behaviors.}

\section{Related Work}
\label{sec:related-work}
Our model combines graph neural networks and adversarial inverse reinforcement learning for interactive system modeling. This section gives a concise review on these two topics and summarizes the existing works closely related to ours. We also discuss some additional works on explainable driving models as a complement to the discussion in Sec. \ref{sec:introduction}.

{\bf Interaction modeling using GNN.} GNN has been widely applied for interactive system modeling in recent years \cite{sukhbaatar2016learning, van2018relational, battaglia2016interaction}. One category of models we find interesting is those with graph attention mechanism. One seminal work is Graph Attention Network (GAT) \cite{velivckovic2017graph} which performed well on large-scale inductive classification problems. VAIN \cite{hoshen2017vain} applied attention in multi-agent modeling. The attention map unravels the interior interaction structure to some extent which improves the explainability of VAIN. An approach closely related to ours is NRI \cite{kipf2018neural}, which modeled the interaction structure explicitly with discrete relational latent space compared to the continuous graph attention. We explain the difference between NRI and our proposed method in Sec. \ref{sec:introduction} and \ref{sec:method}. A related work in the autonomous driving domain is \cite{lee2019joint}, which also modeled interactive driving behavior with semantically meaningful interactions but in a supervised manner.

\textcolor{black}{Another type of models we want to mention is the spatio-temporal graphs (st-graph). St-graph decomposes a complex problem into components and their spatio-temporal interactions, which are represented by nodes and edges of a factor graph. It makes st-graph an ubiquitous representation for interacting systems, e.g., human motion \cite{jain2016structural}, human-robot interaction \cite{liu2021decentralized}, and traffic flow \cite{yu2017spatio}. Jain et al. \cite{jain2016structural} proposed a general method to transform any st-graph to a mixture of RNNs called structural-RNN (S-RNN). When using GRUs, our GNN policy decoder is similar to S-RNN, as they capture the same spatio-temporal dependency. In particular, Liu et al. \cite{liu2021decentralized} combined S-RNN with model-free RL to obtain a structured policy for robot crowd navigation. In terms of the underlying MDP, our GRI model is developed based on a multi-agent MDP, whereas theirs has a single robot as the agent and regards the surrounding humans as parts of the environment. In addition, we adopt a structured reward function for each agent based on the graph, and introduce a relational latent space into the MDP. }

\textcolor{black}{{\bf Adversarial IRL and Imitation Learning.} Now we give a brief review of related works on adversarial IRL. We also include prior works related to generative adversarial imitation learning (GAIL) \cite{ho2016generative}, because GAIL is closely connected to AIRL \cite{finn2016connection}. Both methods have GANs as the backbone models, and learn the discriminator through MaxEntIRL. The difference is that GAIL uses a unstructured discriminator and does not use the generator’s density.}

Our work is mainly related to two categories of methods: multi-agent and latent AIRL/GAIL algorithms. Yu et al. \cite{yu2019multi} proposed a multi-agent AIRL framework for Markov games under correlated equilibrium. It is capable of modeling general heterogeneous multi-agent interactions. The PS-GAIL algorithm \cite{bhattacharyya2018multi} considered a multi-agent environment in the driving domain that is similar to ours\textemdash homogeneous agents with shared policy under centralized control\textemdash and extended GAIL \cite{ho2016generative} to model the interactive behaviors. In \cite{bhattacharyya2019simulating}, they augmented the reward in PS-GAIL as a principle manner to specify prior knowledge, which shares the same spirit with the structured reward functions in GRI.

Latent AIRL models integrate a VAE into either the discriminator or the generator for different purposes. Wang et al. \cite{wang2017robust} conditioned the discriminator on the embeddings generated by a VAE trained separately using behavior cloning. The VAE encodes trajectories into low-dimensional space, enabling the generator to produce diverse behaviors from limited demonstration. VDB \cite{peng2018variational} constrained information contained in the discriminator's internal representation to balance the training procedure for adversarial learning algorithms. The PEMIRL framework \cite{yu2019meta} achieved meta-IRL by encoding demonstration into a contextual latent space. Though studied in different context, PEMIRL is conceptually similar to our framework as both its generator and discriminator depend on the inferred context variables.

{\bf Explainable Autonomous Driving.} At the end of this section, we discuss some additional works related to explainable autonomous driving as a complement to those we have mentioned in Sec. \ref{sec:introduction}. They addressed some shortcomings of the discussed approaches, especially those methods based on attention mechanisms. Kim et al. \cite{kim2018textual} trained a textual explanation generator concurrently with a visual-attention-based controller in a supervised manner. It generates sentences explaining the control action as a consequence of certain objects highlighted in the attention map, which can be easily interpreted compared to visual attention. Another issue of attention that has been raised in the literature is causal confusion \cite{de2019causal}. The model does not necessarily assign high attention weights to objects/regions that influence the control actions. In \cite{kim2017interpretable}, a fine-grained decoder was proposed to refine visual attention maps and detect critical regions through causality tests. In \cite{li2020make}, Li et al. adopted a similar idea for object-level reasoning. Causal inference was applied to identify risk objects in driving scenes. One interesting observation was that the detection accuracy was improved with intervention during the training stage, i.e., augmenting the training data by masking out non-causal objects. However, intervention requires explicit prior knowledge on the causal relations to label the casual and non-causal objects in a scene. Similar to intention labels, such kind of labels is generally prohibitive due to the intricate nature of human cognition.

\section{Background}
\label{sec:background}
In this section, we would like to briefly summarize two algorithms that are closely related to our approach, in order to prepare the readers for the core technical content.

\subsection{Neural Relational Inference (NRI)}\label{subsec:nri}
Kipf et al. \cite{kipf2018neural} represent an interacting system with $N$ objects as a complete bi-directed graph $\mathcal{G_{\mathrm{scene}} = (V, E)}$ with vertices $\mathcal{V}=\left\{v_i\right\}_{i=1}^{N}$ and edges $\mathcal{E}=\left\{e_{i,j}=(v_i, v_j) \mid i \neq j \right\}$. The edge $e_{i,j}$ refers to the one pointing from the vertex $v_i$ to $v_j$. Each vertex corresponds to an object in the system. The NRI model is formalized as a VAE with a GNN encoder inferring the underlying interactions and a GNN decoder synthesizing the system dynamics given the interactions.

Formally, the model aims to reconstruct a given state trajectory, denoted by $\mathbf{x}=\left(\mathbf{x}^0,\dots, \mathbf{x}^{T-1}\right)$, where $T$ is the number of timesteps and $\mathbf{x}^t=\left\{\mathbf{x}^t_1,\dots,\mathbf{x}^t_N\right\}$. The vector $\mathbf{x}^t_i\in{\mathbb{R}^n}$ denotes the state vector of object $v_i$ at time $t$. Alternatively, the trajectory can be decomposed into $\mathbf{x}=(\mathbf{x}_1, \dots, \mathbf{x}_N)$, where $\mathbf{x}_i=\left\{\mathbf{x}^0_i,\dots,\mathbf{x}^{T-1}_i\right\}$. The encoder operates over $\mathcal{G}_\mathrm{scene}$, with $\mathbf{x}_i$ as the node feature of $v_i$. It infers the posterior distribution of the edge type ${z}_{i,j}$ for all the edges, collected into a single vector $\mathbf{z}$. The decoder operates over an interaction graph $\mathcal{G}_\mathrm{interact}$ and reconstructs $\mathbf{x}$. The graph $\mathcal{G}_\mathrm{interact}$ is constructed by assigning sampled $\mathbf{z}$ to the edges of $\mathcal{G}_\mathrm{scene}$ and assigning the initial state to the nodes of $\mathcal{G}_\mathrm{scene}$. If $\mathcal{G}_\mathrm{interact}$ represents the interactions sufficiently, the decoder should be able to reconstruct the trajectory accurately.

The model is trained by maximizing the evidence lower bound (ELBO):
\begin{equation*}
\mathcal{L}=\mathbb{E}_{q_\phi(\mathbf{z}\vert\mathbf{x})}\left[\log p_\gamma (\mathbf{x}\vert\mathbf{z})\right]-D_{KL} \left[q_\phi(\mathbf{z}\vert\mathbf{x})\vert\vert p (\mathbf{z})\right],
\end{equation*}
where $q_\phi(\mathbf{z}\vert\mathbf{x})$ is the encoder output which can be factorized as:
\begin{equation}
q_\phi(\mathbf{z}\vert\mathbf{x})=\prod_{i=1}^N\prod_{j=1, j\neq i}^N q_\phi(z_{i,j}\vert \mathbf{x}), \label{eqn:facto}
\end{equation}
where $\phi$ refers to the parameters of the encoder. The decoder output $p_\gamma(\mathbf{x}\vert\mathbf{z})$ can be written as:
\begin{equation*}
p_\gamma(\mathbf{x}\vert\mathbf{z})=\prod_{t=0}^{T-1}p_\gamma(\mathbf{x}^{t+1}\vert{\mathbf{x}^t, \dots, \mathbf{x}^0, \mathbf{z}}),
\end{equation*}
where $\gamma$ refers to the parameters of the decoder.

\subsection{Adversarial Inverse Reinforcement Learning (AIRL)}\label{subsec:airl}
The AIRL algorithm follows the principle of maximum entropy IRL \cite{ziebart2008maximum}. Consider a MDP defined by $(\mathcal{X, A, T}, r)$, where $\mathcal{X, A}$ are the state space and action space respectively. In the rest of the paper, we use $\mathbf{x}$ and $\mathbf{a}$ with any superscript or subscript to represent a state and action in $\mathcal{X}$ and $\mathcal{A}$. $\mathcal{T}$ is the transition operator given by $\mathbf{x}_{t+1}=f(\mathbf{a}_t, \mathbf{x}_t)$\footnote{The transition is assumed deterministic to simplify the notation. A more general form of the algorithm can be derived for stochastic systems, which is essentially the same with the deterministic case.}, and $r:\mathcal{X} \times \mathcal{A}\rightarrow \mathbb{R}$ is the reward function. The maximum entropy IRL framework assumes a suboptimal expert policy $\pi^\mathrm{E}(\mathbf{a}\vert\mathbf{x})$. The demonstration trajectories generated with the expert policy, $\mathcal{D^\mathrm{E}}=\left\{\boldsymbol{\tau}^\mathrm{E}_1, \dots \boldsymbol{\tau}^\mathrm{E}_M\right\}$ where $\boldsymbol{\tau}^\mathrm{E}_{i}=\left(\mathbf{x}_i^{\mathrm{E}, 0},\mathbf{a}_i^{\mathrm{E}, 0}, \dots, \mathbf{x}_i^{\mathrm{E}, T-1}, \mathbf{a}_i^{\mathrm{E}, T-1}\right)$, have probabilities increasing exponentially with the cumulative reward. Concretely, they follow a Boltzmann distribution:
\begin{equation*}
\boldsymbol{\tau}^\mathrm{E}_i\sim{\pi^\mathrm{E}(\boldsymbol{\tau})} = \frac{1}{Z}\exp\left(\sum_{t=0}^{T-1} r_\lambda(\mathbf{x}_t, \mathbf{a}_t)\right),
\end{equation*}
where $r_\lambda$ is the reward function with parameters denoted by $\lambda$. Maximum entropy IRL aims to infer the underlying reward function parameters of the expert policy. It is formalized as a maximum likelihood problem:
\begin{equation*}
\lambda^* = \mathrm{arg} \max_\lambda \mathbb{E}_{\boldsymbol{\tau}^\mathrm{E}\sim\pi^\mathrm{E}(\boldsymbol{\tau})}\left[\sum_{t=0}^{T-1} r_\lambda(\mathbf{x}^\mathrm{E}_t, \mathbf{a}^\mathrm{E}_t)\right] - \log Z.
\end{equation*}

To derive a feasible algorithm to solve the problem, we need to estimate the partition function $Z$. One practical solution is co-training a policy model with the current estimated reward function through reinforcement learning \cite{finn2016guided}. Finn et al. \cite{finn2016connection} found the equivalency between it and a special form of the generative adversarial network (GAN). The policy model is the generator, whereas a structured discriminator is defined with the reward function to distinguish a generated trajectory $\boldsymbol{\tau}^\mathrm{G}$ from a demonstrated one $\boldsymbol{\tau}^\mathrm{E}$. Fu et al. \cite{fu2017learning} proposed the AIRL algorithm based on it, using a discriminator that identifies generated samples based on the pairs of state and action instead of the entire trajectory to reduce variance:
\begin{equation}
\mathcal{D}_{\lambda,\eta}(\mathbf{x},\mathbf{a})=\frac{\exp\left\{r_\lambda(\mathbf{x},\mathbf{a})\right\}}{\exp\left\{r_\lambda(\mathbf{x},\mathbf{a})\right\}+\pi_\eta(\mathbf{a}\vert\mathbf{x})}, \label{eqn:dis}
\end{equation}
where $\pi_\eta(\mathbf{a}|\mathbf{x})$ is the policy model with parameters denoted by $\eta$. The models $\mathcal{D}_{\lambda,\eta}$ and $\pi_\eta$ are trained adversarially by solving the following min-max optimization problem:
\begin{equation}
\begin{split}
\min_\eta \max_{\lambda} \quad & \mathbb{E}_{\mathbf{x}^\mathrm{E}, \mathbf{a}^\mathrm{E}\sim\pi^\mathrm{E}(\mathbf{x,a})}\left[\log\left(\mathcal{D}_{\lambda,\eta}(\mathbf{x}^\mathrm{E},\mathbf{a}^\mathrm{E})\right)\right] \\
+ & \mathbb{E}_{\mathbf{x}^\mathrm{G}, \mathbf{a}^\mathrm{G}\sim\pi_\eta(\mathbf{x,a})}\left[\log\left(1-\mathcal{D}_{\lambda,\eta}(\mathbf{x}^\mathrm{G},\mathbf{a}^\mathrm{G})\right)\right], \label{eqn:opt}
\end{split}
\end{equation}
where $\pi^\mathrm{E}(\mathbf{x,a})$ denotes the distribution of state and action induced by the expert policy, and $\pi_\eta(\mathbf{x,a})$ is the distribution induced by the learned policy.

\section{Problem Formulation}
\label{sec:formulation}
Our GRI model grounds the relational latent space in a clustering of semantically meaningful interactions by reformulating the relational inference problem into a multi-agent IRL problem. Since the framework has the potential to be generalized to interactive systems in other domains apart from autonomous driving, we will introduce our approach in a general tone. However, it should be aware that we limit our discussion in this paper to autonomous driving problems, without claiming that it can be directly applied to other domains. GRI relies on expert domain knowledge to identify all possible semantic behaviors and design the corresponding reward functions. There exists a broad range of literature on interactive driving behavior modeling \cite{sun2018probabilistic, kesting2010enhanced}, which we can refer to when designing the rewards. We can extend the proposed framework to other fields if proper domain knowledge is available, which is left for future investigation.

We start with modeling the interactive system as a multi-agent MDP with graph representation. As in NRI, the system has an underlying interaction graph $\mathcal{G}_\mathrm{interact}$. The discrete latent variable $z_{i,j}$ takes a value from ${0, 1, \dots, K-1}$, where $K$ is the number of interactions. It indicates the type of relation between $v_i$ and $v_j$ in respect to its effect on $v_j$. Additionally, we assume the objects of the system are homogeneous intelligent agents who make decisions based on their interactions with others.

Concretely, each of them is modeled with identical state space $\mathcal{X}$, action space $\mathcal{A}$, transition operator $\mathcal{T}$ and reward function $r:\mathcal{X} \times \mathcal{A}\rightarrow \mathbb{R}$. At time step $t$, the reward of agent $v_j$ depends on the states and actions of itself and the pairwise interactions between itself and all its neighbors:
\begin{equation}
\begin{split}
&r_{\xi, \psi}(v^t_j, \mathbf{z}_j) = r_\xi^{n}(\mathbf{x}^t_j, \mathbf{a}^t_j) \\
& \quad\quad\quad + \sum_{i\in\mathcal{N}_j}\sum_{k=1}^{K}\mathbf{1}(z_{i,j}=k) r^{{e},k}_{\psi_k}(\mathbf{x}^t_i, \mathbf{a}^t_i, \mathbf{x}^t_j, \mathbf{a}^t_j), \label{eqn:reward}
\end{split}
\end{equation}
where $\mathbf{z}_j$ is the collection of $\left\{{z}_{i,j}\right\}_{i\in\mathcal{N}_j}$, $r_\xi^{n}$ is the node reward function parameterized by $\xi$, $\mathcal{N}_j$ is the set of $v_j$'s neighbouring nodes, $\mathbf{1}$ is the indicator function, and $r_{\psi_k}^{{e}, k}$ is the edge reward function parameterized by $\psi_k$ for the $k^\mathrm{th}$ type of interaction. We utilize expert domain knowledge to design $r_{\psi_k}^{{e}, k}$, so that the corresponding interactive behavior emerges by maximizing the rewards. Particularly, the edge reward equals to zero for $k=0$, indicating the action taken by $v_j$ does not depend on its interaction with $v_i$.

We assume the agents act cooperatively to maximize the cumulative reward of the system:
\begin{equation*}
\begin{split}
\mathcal{R}_{\xi, \psi}(\boldsymbol{\tau},\mathbf{z})&=\sum_{t=0}^{T-1}\mathbf{r}_{\xi, \psi}\left(\mathbf{x}^t, \mathbf{a}^t, \mathbf{z}\right) \\
&=\sum_{t=0}^{T-1}\sum_{j=1}^{N}r_{\xi, \psi}\left(v^t_j, \mathbf{z}_j\right),
\end{split}
\end{equation*}
with a joint policy denoted by $\boldsymbol{\pi}_\eta \left(\mathbf{a}^t\vert{\mathbf{x}^t, \mathbf{z}}\right)$. The cooperative assumption is not necessarily valid for generic traffic scenarios \cite{yu2019multi}, but it simplifies the training procedure significantly. We will leave the extension of the proposed method to non-cooperative interactive traffic scenarios as a future work.

Given a demonstration dataset, we aim to infer the underlying reward function and policy. Different from a typical IRL problem, both $r_{\xi, \psi}$ and $\pi_{\eta}$ depend on $\mathbf{z}$. Therefore, we need to infer the distribution $p(\mathbf{z}\vert\boldsymbol{\tau})$ to solve the IRL problem.

\section{Grounded Relational Inference}\label{sec:method}
We now present the Grounded Relational Inference model to solve the IRL problem specified in Sec. \ref{sec:formulation}. The model consists of three modules modeled by message-passing GNNs \cite{gilmer2017neural}: an encoder inferring the posterior distribution of edge types, a policy decoder generates control actions conditioned on the edge variables sampled from the posterior distribution, and a reward decoder models the rewards conditioned on the inferred edge types.

\subsection{Architecture}
The overall model structure is illustrated in Fig. \ref{fig:architect}. Given a demonstration trajectory $\boldsymbol{\tau}^\mathrm{E}\in\mathcal{D}^\mathrm{E}$, the encoder operates over $\mathcal{G}_\mathrm{scene}$ and approximates the posterior distribution $p(\mathbf{z}\vert\boldsymbol{\tau}^\mathrm{E})$ with $q_\phi(\mathbf{z}\vert\boldsymbol{\tau}^\mathrm{E})$. The policy decoder operates over a $\mathcal{G}_\mathrm{interact}$ sampled from the inferred $q_\phi(\mathbf{z}\vert\boldsymbol{\tau}^\mathrm{E})$ and models the policy $\boldsymbol{\pi}_\eta \left(\mathbf{a}^t\vert{\mathbf{x}^t, \mathbf{z}}\right)$. Given an initial state, we can generate a trajectory by sequentially sampling $\mathbf{a}^t$ from $\boldsymbol{\pi}_\eta \left(\mathbf{a}^t\vert{\mathbf{x}^t, \mathbf{z}}\right)$ and propagating the state. The state is propagated with either the transition operator $\mathcal{T}$ if given, or a simulating environment if $\mathcal{T}$ is not accessible. We denote a generated trajectory given the initial state of $\tau^\mathrm{E}$ as $\tau^{\mathrm{G}}$. Since these two modules are essentially the same in NRI, we omit the detailed model structures here and include them in Appx. \ref{app:model}.

\begin{figure*}[t]
\centering
\includegraphics[width=6.6in]{GRI-framework.pdf}
\caption{Architecture of grounded relational inference model. Given a demonstration trajectory $\boldsymbol{\tau}^\mathrm{E}\in\mathcal{D}^\mathrm{E}$, the encoder operates over $\mathcal{G}_\mathrm{scene}$ and approximates the distribution $p(\mathbf{z}\vert\boldsymbol{\tau}^\mathrm{E})$ with $q_\phi(\mathbf{z}\vert\boldsymbol{\tau}^\mathrm{E})$. The policy decoder operates over a $\mathcal{G}_\mathrm{interact}$ sampled from the inferred $q_\phi(\mathbf{z}\vert\boldsymbol{\tau}^\mathrm{E})$ and models the policy $\boldsymbol{\pi}_\eta \left(\mathbf{a}^t\vert{\mathbf{x}^t, \mathbf{z}}\right)$. Given the initial state of $\boldsymbol{\tau}^\mathrm{E}$, we sample a trajectory $\boldsymbol{\tau}^\mathrm{G}$ by sequentially sampling $\mathbf{a}^t$ from $\boldsymbol{\pi}_\eta \left(\mathbf{a}^t\vert{\mathbf{x}^t, \mathbf{z}}\right)$ and propagating the state. Finally, We use the reward GNN to compute the cumulative rewards of $\boldsymbol{\tau}^\mathrm{G}$ and $\boldsymbol{\tau}^\mathrm{E}$ conditioned on the sampled $\mathcal{G}_\mathrm{interact}$.} \label{fig:architect}
\end{figure*}

The reward decoder computes the reward of a state-action pair given the sampled edge variables. We use it to compute the cumulative rewards of $\boldsymbol{\tau}^\mathrm{G}$ and $\boldsymbol{\tau}^\mathrm{E}$ conditioned on the sampled $\mathcal{G}_\mathrm{interact}$. The reward decoder is in the form of Eqn. (\ref{eqn:reward}). Additionally, we augment the functions $r^n_\xi$ and $r^{e,k}_{\psi_k}$ with MLP shaping terms to mitigate the reward shaping effect \cite{fu2017learning}, resulting in:
\begin{equation}
f^n_{\xi,\omega}(\mathbf{x}^t_j, \mathbf{a}^t_j, \mathbf{x}^{t+1}_j) = r^n_{\xi}(\mathbf{x}^t_j, \mathbf{a}^t_j)+h^n_\omega(\mathbf{x}^{t+1}_j)-h^n_\omega(\mathbf{x}^t_j), \label{eqn:node_reward}
\end{equation}
and
\begin{equation}
\begin{split}
& f^{e,k}_{\psi_k, \chi_k}(\mathbf{x}^t_i, \mathbf{a}^t_i, \mathbf{x}^{t+1}_i, \mathbf{x}^t_j, \mathbf{a}^t_j, \mathbf{x}^{t+1}_j) = r^{{e},k}_{\psi_k}(\mathbf{x}^t_i, \mathbf{a}^t_i, \mathbf{x}^t_j, \mathbf{a}^t_j)\\
&\quad \quad \quad + h^{e,k}_{\chi_k}(\mathbf{x}^{t+1}_i, \mathbf{x}^{t+1}_j)-h^{e,k}_{\chi_k}(\mathbf{x}^{t}_i, \mathbf{x}^{t}_j), \label{eqn:edge_reward}
\end{split}
\end{equation}
where $h^n_{\omega}$ and $h^{e,k}_{\chi_k}$ are MLPs with parameters denoted by $\omega$ and $\chi$ respectively. We denote the shaped reward function of agent $v_j$ by $\mathbf{f}_{\xi,\omega,\psi,\chi}\left(\mathbf{x}^t, \mathbf{a}^t,\mathbf{x}^{t+1},\mathbf{z}\right)$, which equals to the left hand side of Eqn. (\ref{eqn:reward}) but with $r^n_\xi$ and $r^{e,k}_{\psi_k}$ substituted by the augmented rewards. The shaped reward function together with the policy model defines the discriminator which distinguishes $\boldsymbol{\tau}^\mathrm{G}$ from $\boldsymbol{\tau}^\mathrm{E}$:
\begin{equation*}
\begin{split}
&\mathcal{D}_{\xi, \omega, \psi, \chi, \eta}(\mathbf{x}^t, \mathbf{a}^t, \mathbf{x}^{t+1}, \mathbf{z}) \\
&\quad\quad\quad =\frac{\exp\left\{\mathbf{f}_{\xi,\omega,\psi,\chi}\left(\mathbf{x}^t, \mathbf{a}^t,\mathbf{x}^{t+1},\mathbf{z}\right)\right\}}{\exp\left\{\mathbf{f}_{\xi,\omega,\psi,\chi}\left(\mathbf{x}^t, \mathbf{a}^t, \mathbf{x}^{t+1}, \mathbf{z}\right)\right\}+\boldsymbol{\pi}_\eta\left(\mathbf{a}^t\vert \mathbf{x}^t, \mathbf{z}\right)}.
\end{split}
\end{equation*}

\subsection{Training}
We aim to train the three modules simultaneously. Consequently, we incorporate the encoder model $q_\phi\left(\mathbf{z}\vert \boldsymbol{\tau}^\mathrm{E}\right)$ into the objective function of AIRL, resulting in the optimization problem (\ref{eqn:opt-2}). The encoder is integrated into the minimization problem because the reward function has a direct dependence on the latent space. The model is then trained by solving problem (\ref{eqn:opt-2}) in an adversarial scheme: we alternate between training the encoder and reward for the minimization problem and training the policy for the maximization problem. Specifically, the objective for the encoder and reward is the following minimization problem given fixed $\eta$:
\begin{equation}
\begin{aligned}
\min_{\xi, \omega, \psi, \chi, \phi}\quad & \mathcal{J}(\xi, \omega, \psi, \chi, \phi, \eta) \\
\textrm{s.t.}\quad & \mathbb{E}\left\{D_{KL}\left[q_\phi\left(\mathbf{z}\vert \boldsymbol{\tau}^\mathrm{E}\right))\vert\vert p(\mathbf{z})\right]\right\}\leqslant I_c. \label{eqn:min}
\end{aligned}
\end{equation}
The objective for the policy is maximizing $\mathcal{J}(\xi, \omega, \psi, \chi, \phi, \eta)$ with fixed $\xi, \omega, \psi, \chi$ and $\phi$.

\begin{figure*}[t]
\begin{equation}
\begin{split}
\max_\eta \min_{\xi, \omega, \psi, \chi, \phi}\quad & \mathcal{J}(\xi, \omega, \psi, \chi, \phi, \eta)=\mathbb{E}_{\boldsymbol{\tau}^\mathrm{E}\sim\boldsymbol{\pi}^\mathrm{E}(\boldsymbol{\tau})}\Bigg\{\mathbb{E}_{\mathbf{z}\sim{q_\phi\left(\mathbf{z}\vert \boldsymbol{\tau}^\mathrm{E}\right)}}\bigg[-\sum_{t=0}^{T-1}\log \mathcal{D}_{\xi,\omega,\psi,\chi,\eta}(\mathbf{x}^{\mathrm{E}, t}, \mathbf{a}^{\mathrm{E},t}, \mathbf{x}^{\mathrm{E}, t+1},\mathbf{z}) \\
&\qquad\qquad\qquad\quad\ -\mathbb{E}_{\boldsymbol{\tau}^\mathrm{G}\sim\boldsymbol{\pi}_\eta(\boldsymbol{\tau}\vert \mathbf{z})}\sum_{t=0}^{T-1}\log \left(1-\mathcal{D}_{\xi, \omega, \psi, \chi, \eta}(\mathbf{x}^{\mathrm{G}, t}, \mathbf{a}^{\mathrm{G},t}, \mathbf{x}^{\mathrm{G}, t+1}, \mathbf{z})\right)\bigg]\Bigg\}, \\
\textrm{s.t.}\quad & \mathbb{E}_{\boldsymbol{\tau}^\mathrm{E}\sim\boldsymbol{\pi}^\mathrm{E}(\boldsymbol{\tau})}\left\{D_{KL}\left[q_\phi\left(\mathbf{z}\vert \boldsymbol{\tau}^\mathrm{E}\right))\vert\vert p(\mathbf{z})\right]\right\}\leqslant I_c, \label{eqn:opt-2}
\end{split}
\end{equation}
\hrulefill
\end{figure*}

The objective function in the problem (\ref{eqn:opt-2}) is essentially the expectation of the objective function in the problem (\ref{eqn:opt}) over the inferred posterior distribution $q_\phi\left(\boldsymbol{z}\vert \boldsymbol{\tau}^\mathrm{E}\right)$ and the demonstration distribution $\boldsymbol{\pi}^\mathrm{E}\left(\boldsymbol{\tau}\right)$. The constraint enforces an upper bound $I_c$ on the KL-divergence between $q_\phi\left(\mathbf{z}\vert\boldsymbol{\tau}^\mathrm{E}\right)$ and the prior distribution $p(\mathbf{z})$. A sparse prior is chosen to encourage sparsity in $\mathcal{G}_\mathrm{interact}$. It has the similar regularization effect as the $D_{KL}$ term in ELBO. We borrow its format from variational discriminator bottleneck (VDB) \cite{peng2018variational}. VDB improves adversarial training by constraining the information flow from the input to the discriminator. The KL-divergence constraint is derived as a variational approximation to the information bottleneck \cite{alemi2016deep}. Although having different motivation, we adopt it for two reasons. First, the proposed model is not generative because our goal is not synthesizing trajectories from the prior $p(\mathbf{z})$, but inferring the posterior $p\left(\mathbf{z}\vert\boldsymbol{\tau}^\mathrm{E}\right)$. Therefore, regularization derived from information bottleneck is more sensible compared to ELBO. Second, the constrained problem (\ref{eqn:min}) can be relaxed by introducing a Lagrange multiplier $\beta$. During training, $\beta$ is updated through dual gradient descent as follows:
\begin{equation}
\beta \leftarrow \max\left(0, \alpha_\beta\left(\mathbb{E}\left\{D_{KL}\left[q_\phi\left(\mathbf{z}\vert \boldsymbol{\tau}^\mathrm{E}\right))\vert\vert p(\mathbf{z})\right]\right\}- I_c \right) \right) \label{eqn:adapt}
\end{equation}
We find the adaptation scheme particularly advantageous. The model can focus on inferring $\mathbf{z}$ for reward learning after satisfying the sparsity constraint, because the magnitude of $\beta$ decreases towards zero once the constraint is satisfied. However, it is worth noting that our framework does not rely on the bottleneck constraint to induce a semantically meaningful latent space as in \cite{higgins2016beta}. In contrast, GRI relies on the structured reward functions to ground the latent space into semantic interactive behaviors. The bottleneck serves as a regularization to find out the minimal interaction graph to represent the interactions. In fact, we trained the baseline NRI models with the same constraints and weight update scheme. The experimental results show that the constraint itself is not sufficient to induce a sparse interaction graph.

In general, when the dynamics $\mathcal{T}$ is unknown or non-differentiable, maximum entropy RL algorithms \cite{levine2018reinforcement} are adopted to optimize the policy. In this work, we assume known and differentiable dynamics, which is a reasonable assumption for the investigated scenarios. It allows us to directly backpropagate through the trajectory for gradient estimation, which simplifies the training procedure.

\section{Experiments}
\label{sec:experiments}
We evaluate the proposed GRI model on a synthetic dataset as well as a naturalistic traffic dataset. The synthetic data are generated using policy models trained given the ground-truth reward function and interaction graph. We intend to verify if GRI can induce a semantically meaningful relational latent space and infer the underlying relations precisely. The naturalistic traffic data are extracted from the NGSIM dataset. We aim to validate if GRI can model real-world traffic scenarios effectively with the grounded latent space. Unlike synthetic agents, we do not have the privilege to access the ground-truth graphs governing human drivers' interactions. Instead, we construct hypothetical graphs after analyzing the segmented data. The hypotheses reflect humans' understanding of the traffic scenarios. \textcolor{black}{Moreover, the hypothetical graphs are built upon a set of interactive behavior whose characteristics are described by the designed reward functions. We would like to see if the reward functions can incorporate the semantic information into the latent space, and let GRI model real-world interactive systems in the same way as humans.} In each setting, we consider two traffic scenarios, car-following and lane-changing.

\subsection{Baselines} \label{sec:baseline}
The main question of interest is whether GRI can induce semantically meaningful interaction graphs. To answer the question, the most important baseline model for comparison is NRI, because GRI shares the same prior distribution of latent variables with NRI. Comparing the posterior distributions provides insights on whether the structured reward functions can ground the latent space into semantic interactive behaviors. In each experiment, the baseline NRI model has the same encoder and policy decoder as the GRI model. Besides, as stated in Sec. \ref{sec:method}, the same bottleneck constraint and the weight update scheme in Eqn. (\ref{eqn:adapt}) were applied as regularization for minimal representation.

Another model for comparison is a supervised policy decoder. We assume that the ground-truth graphs or human hypotheses are available. Therefore, we can directly train a policy decoder in a supervised way. The ground-truth graph is fed to the policy decoder as a substitute for the interaction graph sampled from the encoder output $q_\phi(\mathbf{z}\vert\boldsymbol{\tau}^\mathrm{E})$. The training of the decoder becomes a simple regression problem. We used mean square error as the loss function to train it.

As additional information is granted, it is unfair to directly compare the performance of GRI with the supervised policy model. Since the supervised model is trained with the ground-truth interaction graphs governing the systems, it is expected to achieve smaller reconstruction error. However, the supervised baseline provides some useful insights. In the naturalistic traffic scenarios, the supervised model gives us some insights into whether the human hypotheses are reasonable. If the supervised model can reconstruct the trajectories precisely, it will justify our practice to adopt graph accuracy as one of the evaluation metrics.

\textcolor{black}{More importantly, in Sec. \ref{sec:ood}, we demonstrate that GRI's latent space still maintains its semantic meaning under some perturbations to the initial states, whereas the decoders of baseline models fail to synthesize those behaviors under the same perturbations, including the supervised policy decoder which is trained with the ground-truth interaction graphs. It support our argument that direct supervision via interaction labels is not sufficient to guide the policy to synthesize behaviors with correct semantic meaning.}

There exist other alternatives for the purpose of trajectory reconstruction. However, it is not our goal in this paper to find an expressive model for accurate reconstruction. Therefore, we do not consider other baselines from this perspective. For the task of grounding the latent space into semantic interactive driving behaviors, we did not find any exact alternatives in the literature. \textcolor{black}{For the specific scenarios studied in this paper, we may design some rule-based approaches to directly infer the interaction graph. However, it is difficult to decide the parameters that best describe the interactive behaviors, because there is a spectrum in how people follow the rules \cite{8814167}. In this paper, we are interested in a data-driven module that can be incorporated into an end-to-end learning model, and has the potential to be generalized to complicated driving scenarios and systems in other domains. Apart from GRI, a potential alternative solution could be adopting a differentiable logic module. For instance, Leung et al. \cite{8814167} proposed a differentiable parametric Signal Temporal Logic formula (pSTL) which could be learnt from data. We will investigate along this direction in our future works.}

\begin{figure*}[t]
\centering
\includegraphics[width=7in]{scene.pdf}
\caption{Test scenarios with the underlying interaction graphs. In the synthetic scenarios, the graphs are the ground-truth ones governing the synthetic experts. In the naturalistic traffic scenarios, the graphs are human hypotheses reflecting humans' understanding of the traffic scenarios.} \label{fig:scene}
\end{figure*}

\subsection{Evaluation Metrics}
To evaluate a trained model, we sample a $\boldsymbol{\tau}^\mathrm{E}$ from the test dataset and extract the maximum posterior probability (MAP) estimate of edge variables, $\hat{\mathbf{z}}$, from $q_\phi(\mathbf{z}\vert\boldsymbol{\tau}^\mathrm{E})$. Afterward, we obtain a single sample of trajectories $\hat{\boldsymbol{\tau}}$ by executing the mean value of the policy output. The root mean square errors (RMSE) of states and the accuracy of $\mathcal{G}_\mathrm{interact}$ are selected as the evaluation metrics, which are computed based on $\hat{\mathbf{z}}$, $\hat{\boldsymbol{\tau}}$, $\boldsymbol{\tau}^\mathrm{E}$, and the ground truth or hypothetical latent variables denoted by $\mathbf{z}^\mathrm{E}$:
\begin{equation*}
\begin{split}
\mathrm{RMSE}_\epsilon &= \sqrt{\frac{1}{(N-1)T}\sum_{j=1}^{N}\sum_{t=0}^{T-1}(\epsilon^{\mathrm{E},t}_j-\hat{\epsilon}^t_j)^2}, \\
\mathrm{Accuracy} &= \frac{\sum_{i=1}^{N} \sum_{j=1, j\neq i}^{N}\mathbf{1}(z^{\mathrm{E}}_{i,j}=\hat{z}_{i,j})}{N(N-1)}.
\end{split}
\end{equation*}
If multiple edge types exist, we test all the possible permutations of edge types and report the one with the highest graph accuracy for NRI.

\textcolor{black}{It is worth noting that the graph accuracy on the naturalistic traffic dataset merely quantifies the divergence between the inferred graphs and the hypotheses we construct. We anticipate that GRI can attain a higher accuracy than NRI. It will imply that we can incorporate human domain knowledge into GRI and induce a semantic relational latent space consistent with the hypotheses built upon the same domain knowledge. However, a low graph accuracy does not necessarily mean that humans cannot interpret the inferred graphs well. The hypothetical graphs represent one perspective to interpret the interactive scenes. It is possible that NRI may find another sensible way to categorize and interpret the interactions, which can also be understood by humans.}

\textcolor{black}{To further study the explainability of the learned latent spaces, we want to look into the inferred graphs and have a qualitative comparison between the latent spaces learned by the two models. For each setting, we compute the distribution of estimated edge variables $\hat{\mathbf{z}}$ over the test dataset. As in \cite{kipf2018neural}, we visualize the results in multiple adjacency matrices corresponding to different edge types. In the adjacency matrix corresponding to the $k^\mathrm{th}$ type of interaction, the element $A_{i,j}$ indicates the relative frequency of $\hat{z}_{j,i}=k$, where $\hat{z}_{j,i}$ is the latent variable for the edge from node $j$ to node $i$. In other words, $A_{i,j}$ equals the ratio of test samples where the model infers $\hat{z}_{j,i}=k$. By inspecting the edge type distributions, we can get some extra insights into the explainability of the two models beyond the quantitative metrics.}

\subsection{Synthetic Scenes}\label{sec:synthetic}

\begin{table*}[t]
\centering
\caption{Performance Comparison on Synthetic Dataset}
\label{table:synthetic}
\begin{threeparttable}
\centering
\begin{tabular}{|c|c|c|c|c|c|c|c|}
\hline
\multirow{2}{*}{Model} & \multicolumn{3}{c|}{Car Following ($\Delta t=0.2s$, $T=20$)} & \multicolumn{4}{c|}{Lane Changing ($\Delta t=0.2s$, $T=30$)} \\ \cline{2-8}
& $\mathrm{RMSE}_x(\mathrm{m})$ & $\mathrm{RMSE}_v(\mathrm{m/s})$ & $\mathrm{Accuracy}(\%)$ & $\mathrm{RMSE}_x(\mathrm{m})$ & $\mathrm{RMSE}_y(\mathrm{m})$ & $\mathrm{RMSE}_v(\mathrm{m/s})$ & $\mathrm{Accuracy}(\%)$ \\ \hline GRI & $0.241\pm{0.125}$ & $0.174\pm{0.068}$ & $\mathbf{100.00\pm{0.00}}$ & \textcolor{black}{$0.529\pm{0.230}$} & \textcolor{black}{$0.207\pm{0.046}$} & \textcolor{black}{$0.303\pm{0.128}$} & \textcolor{black}{$\mathbf{99.95\pm{0.01}}$} \\ \hline NRI & ${0.047\pm{0.024}}$ & ${0.056\pm{0.015}}$ & $66.70\pm{0.00}$ & \textcolor{black}{${0.109\pm{0.045}}$} & \textcolor{black}{${0.155\pm{0.038}}$} & \textcolor{black}{${0.061\pm{0.016}}$} & \textcolor{black}{$55.9\pm{7.98}$} \\ \hline Supervised & $\mathbf{0.039\pm{0.016}}$ & $\mathbf{0.050\pm{0.009}}$ & - & \textcolor{black}{$\mathbf{0.062\pm{0.027}}$} & \textcolor{black}{$\mathbf{0.145\pm{0.035}}$} & \textcolor{black}{$\mathbf{0.048\pm{0.011}}$} & -\\ \hline
\end{tabular}
\begin{tablenotes}
\item[1] The data is presented in form of $\text{mean}\pm{\text{std}}$.
\end{tablenotes}
\end{threeparttable}
\end{table*}

As mentioned above, we designed two synthetic scenarios, car-following and lane-changing. The two scenes and their underlying interaction graphs are illustrated in Fig. \ref{fig:scene}. In both scenarios, we have a leading vehicle whose behavior does not depend on the others. Its trajectory is given without the need for reconstruction. We assume it runs at constant velocity. The other vehicles interact with each other and the leader in different ways. In the car-following scene, we model the system with two types of edges: $z_{i,j}=1$ means that Vehicle $j$ follows Vehicle $i$; $z_{i,j}=0$ means that Vehicle $j$ does not interact with Vehicle $i$. In the lane-changing scene, two additional edge types are introduced: $z_{i,j}=2$ means that Vehicle $j$ yields to Vehicle $i$; $z_{i,j}=3$ means that Vehicle $j$ cuts in front of Vehicle $i$.

The MDPs for the tested scenarios are specified as follows. In the car-following scene, since the vehicles mainly interact in longitudinal direction, we only model their longitudinal dynamics to simplify the problem. For all $j\in\{1,2,3\}$, the state vector of Vehicle $j$ consists of three states: $\mathbf{x}^t_{j}=\left[x^t_j\ v^t_j\ a^t_j\right]^\intercal$, where $x^{t}_j$ is the longitudinal coordinate, $v^{t}_j$ is the velocity, and $a^t_j$ is the acceleration. There is only one control input which is the jerk. We denote it as $\delta a^t_j$. The dynamics is governed by a 1D point-mass model:
\begin{align*}
x^{t+1}_j &= x^t_j + v^t_j\Delta t + \frac{1}{2}a^t_j{\Delta t}^2,\\
v^{t+1}_j &= v^t_j + a^t_j\Delta t, \\
a^{t+1}_j &= a^t_j + \delta a^t_j\Delta t,
\end{align*}
where $\Delta t$ is the sampling time. In the lane-changing scene, we consider both longitudinal and lateral motions. The state vector consists of six states instead: $\mathbf{x}^t_{j}=\left[x^t_j\ y^t_j\ v^t_j\ \theta^t_j\ a^t_j\ \omega^t_j\right]^\intercal$. The three additional states are the lateral coordinate $y^{t}_j$, the yaw angle $\theta^t_j$, and the yaw rate $\omega^t_j$. There is one additional action which is the yaw acceleration, denoted by $\delta \omega^t_j$. We model the vehicle as a Dubins' car:
\begin{align*}
x^{t+1}_j &= x^t_j + v^t_j\cos(\theta^t_j)\Delta t, \\
y^{t+1}_j &= y^t_j + v^t_j\sin(\theta^t_j)\Delta t, \\
v^{t+1}_j &= v^t_j + a^t_j\Delta t, \\
\theta^{t+1}_j &= \theta^{t}_j + \omega^t_j\Delta t, \\
a^{t+1}_j &= a^t_j + \delta a^t_j\Delta t, \\
\omega^{t+1}_j &= \omega^{t}_j + \delta \omega^t_j\Delta t.
\end{align*}

The structured reward functions were designed based on expert domain knowledge (e.g. transportation studies \cite{kesting2010enhanced, treiber2000congested}). We mainly referred to \cite{sun2018probabilistic, naumann2020analyzing} in this paper. For the car-following behavior, its reward function is defined as follows:
\begin{equation*}
\begin{split}
r^{e,1}_{\psi_1} \left(\mathbf{x}^t_i, \mathbf{x}^t_j\right) = & - \left(1+\exp(\psi_{1,0})\right) g_\mathrm{IDM}(\mathbf{x}^t_i, \mathbf{x}^t_j) \\
& - \left(1+\exp(\psi_{1,1})\right) g_\mathrm{dist}(\mathbf{x}^t_i, \mathbf{x}^t_j) \\
& - \left(1+\exp(\psi_{1,2})\right) g_\mathrm{lat}(\mathbf{x}^t_i, \mathbf{x}^t_j),
\end{split}
\end{equation*}
where the features are defined as:
\begin{align}
g_\mathrm{IDM}(\mathbf{x}^t_i, \mathbf{x}^t_j) & = \left(\max{\left(x^t_i-x^t_j, 0\right)}-\Delta x^{\mathrm{IDM},t}_{i,j}\right)^2, \label{eqn:fIDM}\\
g_\mathrm{dist}(\mathbf{x}^t_i, \mathbf{x}^t_j) & = \exp\left(-\frac{\left(\max{\left(x^t_i-x^t_j, 0\right)}\right)^2}{\zeta^2}\right), \label{eqn:fdist}\\
g_\mathrm{lat}(\mathbf{x}^t_i, \mathbf{x}^t_j) & =\left(y^t_j - g_\mathrm{center}(y^t_i)\right)^2.\nonumber
\end{align}
The feature $g_\mathrm{IDM}$ suggests a spatial headway $\Delta x^{\mathrm{IDM},t}_{i,j}$ derived from the intelligent driver model (IDM) \cite{kesting2010enhanced}. The feature $f_\mathrm{dist}$ ensures a minimum collision-free distance. We penalize the following vehicle for surpassing the preceding one with the help of $x^{\mathrm{IDM},t}_{i,j}$ in Eqn. (\ref{eqn:fIDM}) and Eqn. (\ref{eqn:fdist}). The last feature $g_\mathrm{lat}$ exists only in lane-changing. It regulates the following vehicle to stay in the same lane as the preceding one with the help of $g_\mathrm{center}$, which determines the lateral coordinate of the corresponding centerline based on the position of the preceding vehicle. \textcolor{black}{Altogether, the features define the following behavior as staying in the same lane as the preceding vehicle whereas keeping a safe longitudinal headway.}

The reward function for yielding is defined as:
\begin{equation*}
\begin{split}
r^{e,2}_{\psi_2} \left(\mathbf{x}^t_i, \mathbf{x}^t_j\right) = & - \left(1+\exp(\psi_{2,0})\right) g_\mathrm{yield}(\mathbf{x}^t_i, \mathbf{x}^t_j) \\
& - \left(1+\exp(\psi_{2,1})\right) g_\mathrm{dist}(\mathbf{x}^t_i, \mathbf{x}^t_j).
\end{split}
\end{equation*}
The feature $g_\mathrm{dist}$ is defined in Eqn. (\ref{eqn:fdist}). The other feature $g_\mathrm{yield}$ suggests an appropriate spatial headway for yielding:
\begin{align}
g_\mathrm{yield}(\mathbf{x}^t_i, \mathbf{x}^t_j) = & \mathbf{1}\left(g_\mathrm{center}(y^t_j)=g_\mathrm{center}(y^t_i)\right)g_\mathrm{IDM}(\mathbf{x}^t_i, \mathbf{x}^t_j)\nonumber \\
+ & \mathbf{1}\left(g_\mathrm{center}(y^t_j)\neq g_\mathrm{center}(y^t_i)\right)g_\mathrm{goal}(\mathbf{x}^t_i, \mathbf{x}^t_j), \nonumber\\
g_\mathrm{goal}(\mathbf{x}^t_i, \mathbf{x}^t_j) = & \left(\max{\left(x^t_i-x^t_j-\Delta x^{\mathrm{yield}}, 0\right)}\right)^2. \label{eqn:fgoal}
\end{align}
The suggested headway is set to be a constant value, $\Delta x^{\mathrm{yield}}$, when the other vehicle is merging, and switches to $\Delta x^{\mathrm{IDM},t}_{i,j}$ once the merging vehicle enters into the same lane, where its behavior becomes consistent with car following. \textcolor{black}{We follow \cite{sun2018probabilistic} to adopt different reward functions depending on the lanes where the vehicles are located. Merging occurs during a short period of time. Therefore, we assume the driver sets a fixed short-term goal distance as in \cite{sun2018probabilistic} and then transits to following behavior afterwards.}

The reward function for cutting-in is quite similar:
\begin{equation*}
\begin{split}
r^{e,3}_{\psi_3} \left(\mathbf{x}^t_i, \mathbf{x}^t_j\right) = & - \left(1+\exp(\psi_{3,0})\right) g_\mathrm{goal}(\mathbf{x}^t_j, \mathbf{x}^t_i) \\
& - \left(1+\exp(\psi_{3,1})\right) g_\mathrm{dist}(\mathbf{x}^t_j, \mathbf{x}^t_i),
\end{split}
\end{equation*}
where the features are defined as in Eqn. (\ref{eqn:fdist}) and Eqn. (\ref{eqn:fgoal}), but with the input arguments switched, because the merging vehicle should stay in front of the yielding one.

Apart from the edge rewards, all the agents share the same node reward function. The following one is adopted for lane-changing:
\begin{equation*}
\begin{split}
r^n_\xi (\mathbf{x}^t_j, \mathbf{a}^t_j)=&-\left(1+\exp(\xi_0)\right)f_v(\mathbf{x}_j^t)\\
&-\left(1+\exp({\xi}_{1:3})\right)^\intercal {f}_\mathrm{state}(\mathbf{x}^t_j)\\
&-\left(1+\exp({\xi}_{4:5})\right)^\intercal {f}_\mathrm{action}(\mathbf{a}^t_j)\\
&-\left(1+\exp(\xi_{6})\right)f_\mathrm{lane}(\mathbf{x}_j^t),
\end{split}
\end{equation*}
where $f_\mathrm{state}$ and $f_\mathrm{action}$ take the element-wise square of $\left[a^t_j\ \theta^t_j\ \omega^t_j \right]$ and $\left[\delta a^t_j\ \delta\omega^t_j \right]$ respectively. \textcolor{black}{It penalizes large control inputs as well as drastic longitudinal and angular motions to induce smooth and comfortable maneuver.} The feature $f_v$ is the squared error between $v^t_j$ and the speed limit $v_\mathrm{lim}$. \textcolor{black}{It regulates the vehicles to obey the speed limit.} The last term $f_\mathrm{lane}$ penalizes the vehicle for staying close to the lane boundaries. For car-following , we simply remove those terms that are irrelevant in 1D motion.
In all the reward functions, the parameters collected in $\psi$ and $\xi$ are unknown during training and inferred by GRI. We take the exponents of them and add one to the results. It enforces the model to use the features when modeling the corresponding interactions.

With the scenarios defined above, we aim to generate one dataset for each scenario. For each scenario, we randomly sampled the initial states of the vehicles and trained an expert policy given the ground-truth reward functions and the interaction graph. Afterwards, we used the trained policy to generate the dataset. The same sampling scheme was used to sample the initial states.

{\bf Results.} On each dataset, we trained a GRI model with the policy decoder (\ref{eqn:policy1})-(\ref{eqn:policy3}) introduced in Appx. \ref{app:model}. The results are summarized in Table \ref{table:synthetic}. The NRI model can reconstruct the trajectories with errors close to the supervised policy. However, it learns a relational latent space that is different from the one governing the demonstration; Therefore, the edge variables cannot be interpreted as those semantic interactive behaviors. In contrast, our GRI model interprets the interactions consistently with the domain knowledge inherited in the demonstration, and recovers the interaction graph with high accuracies. It has larger reconstruction errors compared to the baseline approaches. However, it still sufficiently recovers the interactive behaviors, and the reconstructed trajectories are sensible (see Appx. \ref{app:visual}).

We computed the empirical distribution of the estimated edge variables $\hat{z}$ over the test dataset. The results are summarized in Fig. \ref{fig:sim_graph}. The distribution concentrates into a single interaction graph for both models in both scenarios\textemdash as opposed to the case on the naturalistic traffic dataset introduced in the next section\textemdash because the synthetic agents have consistent interaction patterns over all the samples. \textcolor{black}{We observe that NRI learns symmetric relations: In both scenarios, the NRI model assigns the same edge types to the edges $e_{0,1}$ and $e_{1,0}$. It is difficult to interpret their semantic meaning because those pairwise interactions are asymmetric in our synthetic scenes. In contrast, the reward functions in our GRI model enforce an asymmetric relational latent space.}

\begin{figure}[t]
\centering
\includegraphics[width=3.2in]{synthetic_graph.pdf}
\caption{The empirical distribution of estimated edge variables $\hat{z}$ over the test dataset in the synthetic scenarios. We summarize the results in multiple adjacency matrices corresponding to different edge types. In the adjacency matrix corresponding to the $k^\mathrm{th}$ type of interaction, the element $A_{i,j}$ indicates the relative frequency of $\hat{z}_{j,i}=k$, where $\hat{z}_{j,i}$ is the latent variable for the edge from node $j$ to node $i$. } \label{fig:sim_graph}
\end{figure}

\subsection{Naturalistic Traffic Scenes}
To evaluate the proposed method in real-world traffic scenarios, we investigated the same scenarios as in the synthetic case, car-following and lane-changing. we segmented data from the Highway-101 and I-80 datasets of NGSIM. Afterwards, we further screened the data to select those interactive samples and ensure that no erratic swerving or multiple lane changes occur. Unlike the synthetic agents, human agents do not have a ground-truth interaction graph that governs their interactions. Instead, we constructed hypothetical $\mathcal{G}_\mathrm{interact}$ after analyzing the segmented data. The hypotheses for the two scenarios are depicted in Fig. \ref{fig:scene}. The one for car-following is identical to the ground-truth interaction graph we designed for the synthetic agents. However, we proposed a different hypothesis for lane-changing. We excluded the cutting-in relation to reduce the number of edge types and therefore simplify the training procedure. Moreover, we differentiated distinct interactions according to the vehicles' lateral position. We say that a vehicle yields to its preceding vehicle if they drive in neighbouring lanes, whereas it follows the preceding one if they drive in the same lane.

\textcolor{black}{As in the synthetic scenes, the trajectory of the leading vehicle is given without the need for reconstruction. We feed the ground-truth state of the leading vehicle sequentially to the policy decoder when decoding the trajectories of the other vehicles. This practice enables us to heuristically isolate a small interacting group out of the numerous number of vehicles on the highway. While the leading vehicle's behavior depends on the other vehicles, it is fairly reasonable to assume that the behavior of the modeled following vehicles is independent from other surrounding vehicles on the road after conditioning on the trajectory of the leading vehicle. Even though there may still exist other surrounding vehicles interacting with them, their influence should be subtle. The models should be able to well capture the interactions among the modeled subset while marginalizing out those subtle effects.}

The node dynamics is the same as in the synthetic scene for car-following. For lane-changing, since we did not have accurate heading information, we adopted 2D point-mass model instead. Since the behavior of human drivers is much more complicated than the synthetic agents, we designed reward functions with larger model capacity using neural networks. In car-following, the reward functions are defined as follows:
\begin{align*}
r^{e,1}_{\psi_1} \left(\mathbf{x}^t_i, \mathbf{x}^t_j\right) = & - \left(1+\exp(\psi_{1,0})\right) g_\mathrm{v}^\mathrm{NN}(\mathbf{x}^t_i, \mathbf{x}^t_j) \\
& - \left(1+\exp(\psi_{1,1})\right) g_\mathrm{s}^\mathrm{NN}(\mathbf{x}^t_i,\mathbf{x}^t_j),\\
r^n_\xi \left(\mathbf{x}^t_j, \mathbf{a}^t_j\right) = &-\left(1+\exp(\xi_0)\right)f_\mathrm{v}^\mathrm{NN}(\mathbf{x}^t_j) \\
&- \left(1+\exp(\xi_1)\right)f_\mathrm{acc}(\mathbf{x}^t_j)\\
&- \left(1+\exp(\xi_2)\right)f_\mathrm{jerk}(\mathbf{x}^t_j, \mathbf{a}^t_j),
\end{align*}
where the features are defined as:
\begin{align*}
f_\mathrm{v}^\mathrm{NN}(\mathbf{x}^t_j) & = \left(v^t_{j}-h_1(\mathbf{x}^t_j)\right)^2,\\
g_\mathrm{v}^\mathrm{NN}(\mathbf{x}^t_i, \mathbf{x}^t_j) & = \left(v^t_{j}-h_2(\mathbf{x}^t_i, \mathbf{x}^t_j)\right)^2,\\
g_\mathrm{s}^\mathrm{NN}(\mathbf{x}^t_i,\mathbf{x}^t_j) & = \mathrm{ReLU} {\left(h_3\left(\mathbf{x}^t_i, \mathbf{x}^t_j\right)-x^t_i+x^t_j\right)}^2.
\end{align*}
The features $f_\mathrm{acc}$ and $f_\mathrm{jerk}$ penalize the squared magnitude of acceleration and jerk \textcolor{black}{to induce smooth and comfortable maneuver}. The functions $h_1$, $h_2$ and $h_3$ are neural networks with ReLU output activation. The feature $g^\mathrm{NN}_\mathrm{s}$ is the critical component which shapes the car-following behavior. It learns a non-negative reference headway and penalizes the following vehicle for violating it. The feature $g_\mathrm{v}^\mathrm{NN}$ and $f_\mathrm{v}^\mathrm{NN}$ suggest reference velocities considering interaction and merely itself respectively. \textcolor{black}{The edge reward function has large modeling capacity because we let it learn adaptive reference headway and velocity from data. Nevertheless, it still defines the fundamental characteristic of the following behavior, which is always staying behind the preceding vehicle.}

\textcolor{black}{In lane-changing, the node reward function and the edge reward function for the following behavior are similar to those in the car-following scenario. The node reward function has additional term for lateral position, which encourages the vehicles to drive on the target lane, i.e., the lane where the leading vehicle is driving. It also has additional terms to penalize the magnitude of lateral velocity and acceleration to induce comfortable maneuver.} To design the yielding reward, we define a collision point of two vehicles based on their states. We approximate the vehicles' trajectories as piecewise-linear between sequential timesteps, and compute the collision point as the intersection between their trajectories (Fig. \ref{fig:poc_viz}). We threshold the point if it exceeds a hard-coded range of interest (e.g. if it is behind the vehicles or greater than certain distance). Afterwards, we define the distance-to-collision ($d_{poc}$) as the longitudinal distance from the vehicle to the collision point, and the time-to-collision ($T_{col}$) as the time to reach the collision point calculated by dividing $d_{poc}$ with the velocity of the vehicle. Then the yielding reward function is defined as follows:
\begin{equation*}
\begin{split}
r^{e,2}_{\psi_2} \left(\mathbf{x}^t_i, \mathbf{x}^t_j\right) = & - \left(1+\exp(\psi_{2,0})\right)g_\mathrm{spatial}^\mathrm{NN}(\mathbf{x}^t_i, \mathbf{x}^t_j) \\
& - \left(1+\exp(\psi_{2,1})\right)g_\mathrm{time}^\mathrm{NN}(\mathbf{x}^t_i, \mathbf{x}^t_j),
\end{split}
\end{equation*}
where
\begin{equation*}
\begin{split}
g_\mathrm{spatial}^\mathrm{NN}(\mathbf{x}^t_i, \mathbf{x}^t_j) &= \mathrm{ReLU}{\left((x_{j}-x_{poc})-h_\mathrm{d_{poc}}(\mathbf{x}^t_i, \mathbf{x}^t_j)\right)}^2,\\
g_\mathrm{time}^\mathrm{NN}(\mathbf{x}^t_i, \mathbf{x}^t_j) &=
\mathrm{ReLU}{\left(h_\mathrm{T_{col}}(\mathbf{x}^t_i, \mathbf{x}^t_j)-(T_{col_{i}}-T_{col_{j}})\right)}^2.
\end{split}
\end{equation*}
The functions $h_\mathrm{d_{poc}}$ and $h_\mathrm{T_{col}}$ are neural networks with ReLU output activation. The $g_\mathrm{spatial}$ term learns a spatial aspect of the yield behavior and compares the agent's distance from the estimated collision-point with the NN-learned \textit{safe} reference within which the lane-changing maneuver can be done. The second term $g_\mathrm{time}$ adds a temporal aspect, by enforcing the vehicle to ensure a minimum \emph{safe} time headway. \textcolor{black}{We adopt $g_\mathrm{time}$ because time-to-collision is an important measure in traffic safety assessment \cite{minderhoud2001extended}}. The intuition behind is to ensure that the vehicles do not occupy the same position at the same time.

\begin{figure}[t]
\centering
\includegraphics[height=1.1in]{poc_cropped.pdf}
\caption{Collision point diagram. At every timestep, the heading vector of the agents' can be calculated approximating the motion as linear. The intersection between these vectors is taken to be the collision point where the agents would collide if a yield action is not taken. } \label{fig:poc_viz}
\end{figure}

{\bf Results.} For each scenario, we trained a GRI model with the recurrent policy decoder (\ref{eqn:rnn1})-(\ref{eqn:rnn4}) in Appx. \ref{app:model}. The results are summarized in Table \ref{table:ngsim}. In car-following, the NRI model still performs better on trajectory reconstruction, but the GRI model achieves comparable RMSE on NGSIM dataset. In lane-changing, their comparison is consistent: The NRI model slightly outperforms our model in trajectory reconstruction; Our model dominates the NRI model in graph accuracy.

We visualize the interaction graphs in Fig. \ref{fig:ngsim_graph}. One interesting observation is that the graphs inferred by NRI have more edges in general. We want to emphasize that both models are trained under the same sparsity constraint. The results imply that we could guide the model to explore a clean and sparse representation of interactions by incorporating relevant domain knowledge, whereas the sparsity regularization itself is not sufficient to serve the purpose. Moreover, the NRI model assigns the same edge type to both edges between a pair of agents. It makes the graphs less interpretable because the vehicles ought to affect each other in different ways. On the other hand, even if different from the hypotheses, our GRI model tends to infer sparse graphs with directional edges.

\textcolor{black}{For the supervised policy, it has the lowest reconstruction error in lane-changing. It implies that the human hypothesis is reasonable because it is capable to model the interactions among human drivers. For the car-following case, its reconstruction error is slightly higher than NRI. Since we cannot assure that our hypothesis is the ground-truth interaction graph underlying the interacting system\textemdash In fact, as we mentioned before, we never meant to treat it as the ground-truth\textemdash it is possible that the NRI model can find a latent space that can effectively model the interactions in the unsupervised manner. However, as shown in Fig. \ref{fig:ngsim_graph}, it is difficult to interpret the graphs inferred by NRI. Considering the sparse and semantic nature of the hypothesis as well as the fact that the supervised policy's reconstruction error is on par with the NRI model, we think the chosen hypothesis is a valid one.}

\begin{table*}[t]
\centering
\caption{Performance Comparison on Naturalistic Traffic Dataset}
\label{table:ngsim}
\begin{threeparttable}
\centering
\begin{tabular}{|c|c|c|c|c|c|c|c|c|}
\hline
\multirow{2}{*}{Model} & \multicolumn{3}{c|}{Car Following ($\Delta t=0.2s, T=30$)} & \multicolumn{4}{c|}{Lane Changing ($\Delta t=0.2s, T=40$)} \\ \cline{2-8}
& $\mathrm{RMSE}_x(\mathrm{m})$ & $\mathrm{RMSE}_v(\mathrm{m/s})$ & $\mathrm{Accuracy}(\%)$ & $\mathrm{RMSE}_x(\mathrm{m})$ & $\mathrm{RMSE}_y(\mathrm{m})$ & $\mathrm{RMSE}_v(\mathrm{m/s})$ & $\mathrm{Accuracy}(\%)$ \\ \hline GRI & $1.700\pm{1.005}$ & $0.721\pm{0.363}$ & $\mathbf{100.00\pm{0.00}}$ & $7.118\pm{3.647}$ & $0.764\pm{0.336}$ & ${4.320\pm2.392}$ & $\mathbf{98.55\pm{0.06}}$ \\ \hline NRI & $\mathbf{1.436\pm{0.880}}$ & $\mathbf{0.650\pm{0.328}}$ & $64.09\pm{0.08}$ & $6.532\pm{3.822}$ & $0.330\pm{0.181}$ & $\mathbf{4.291\pm2.544}$ & $28.98\pm{0.08}$ \\ \hline Supervised & ${1.482\pm{0.938}}$ & ${0.665\pm{0.344}}$ & - & $\mathbf{5.897\pm{3.651}}$ & $\mathbf{0.323\pm{0.223}}$ & $4.307\pm{2.435}$ & - \\ \hline
\end{tabular}
\begin{tablenotes}
\item[1] The data is presented in form of $\text{mean}\pm{\text{std}}$.
\end{tablenotes}
\end{threeparttable}
\end{table*}

\begin{figure}[t]
\centering
\includegraphics[width=3.1in]{ngsim_graph.pdf}
\caption{The empirical distribution of estimated edge variables $\hat{z}$ over the test dataset in the naturalistic traffic scenarios. We summarize the results in multiple adjacency matrices corresponding to different edge types. In the adjacency matrix corresponding to the $k^\mathrm{th}$ type of interaction, the element $A_{i,j}$ indicates the relative frequency of $z_{j,i}=k$, where $z_{j,i}$ is the latent variable for the edge from node $j$ to node $i$. } \label{fig:ngsim_graph}
\end{figure}

\subsection{\textcolor{black}{Semantic Meaning of Latent Space}} \label{sec:ood}
\textcolor{black}{The above experimental results show that our GRI model can recover the ground-truth interaction graphs in the synthetic scenarios with high accuracy, and infer interaction graphs that are consistent with human hypothesis on the NGSIM dataset. However, as we argue in Sec. \ref{sec:introduction}, accurate interaction inference alone is not sufficient to show that the model learns a semantically meaningful latent space that is consistent with human domain knowledge. Given an edge, the policy decoder should also synthesize the corresponding semantic interactive behavior indicated by its edge type. It is difficult to verify whether the policy decoder is able to synthesize semantically meaningful interaction simply by monitoring the reconstruction error. Small reconstruction error on in-distribution data could be achieved by imitating demonstration without modeling the correct interaction \cite{de2019causal, tang2021exploring}. To study the semantic meaning of latent space, we design a set of out-of-distribution tests \footnote{For clarification, the models used in this section are the same as those introduced in Sec. \ref{sec:synthetic}. We merely designed additional out-of-distribution cases for testing.} by adding increasing perturbation to the initial states. We then enforce the same edge types as in the in-distribution case, and run those different policy decoders to generate the trajectories. We are curious about whether the policy decoders can consistently synthesize the correct semantic interactive behavior under distribution shift. If so, we claim the latent space indeed possesses the semantic meaning that is consistent with human domain knowledge.}

\textcolor{black}{In the synthetic scenarios, we focus on the following relation. For both car-following and lane-changing scenes, we keep the two vehicles with the following relation, resulting in interaction graphs merely consisting of the following edges (Fig. \ref{fig:ood_scene}). We introduce perturbation by decreasing the initial longitudinal headway to values unseen during the training stage.} The initial longitudinal headway is defined as $\Delta x=x^0_1 - x^0_0$, namely the longitudinal distance from Vehicle 1 to Vehicle 0 at the first time step. During the training stage, we sampled $\Delta x$ from uniform distributions: In car-following, $\Delta x\sim \mathrm{unif}(4, 8)$; In lane-changing, $\Delta x\sim \mathrm{unif}(8, 12)$. In the out-of-distribution experiments, we gradually decreased $\Delta x$ from the lower bound to some negative value, which means Vehicle 0 is placed in front of Vehicle 1. We are curious about if the models can generate trajectories meeting the characteristics of the car-following behavior in these unseen scenarios\textemdash scenarios with a different number of vehicles and distorted state distribution. To quantitatively evaluate if the synthesized behavior satisfying the requirement of car-following, we consider three metrics for evaluation:

\begin{itemize}
{\color{black}
\item Success Rate:
\begin{align}
\mathrm{Success Rate} &= \frac{1}{N}\sum_{i=1}^N\mathbf{1}(\Delta x^f_{i} \geqslant \delta_f), \label{eqn:success_rate} \\
\textrm{where } \Delta x^f_{i} &= x^T_{1,i} - x^T_{0,i}, \nonumber
\end{align}

\item Collision Rate:
\begin{align}
\mathrm{Collision Rate} &= \frac{1}{N}\sum_{i=1}^N \mathbf{1}(d_{\min,i} \leqslant \delta_c), \label{eqn:minimum_distance} \\
\textrm{where } d_{\min,i} & = \min_t \sqrt{\left|x^t_{1,i} - x^t_{0,i}\right|^2 + \left|y^t_{1,i} - y^t_{0,i}\right|^2}, \nonumber
\end{align}
}

\item Lateral distance:
\begin{equation}
\Delta y = \left|y^T_1 - y^T_0\right| - \left|y^0_1 - y^0_0\right|. \label{eqn:lateral_distance}
\end{equation}

\end{itemize}

\begin{figure}[t]
\centering
\includegraphics[width=2.7in]{ood_scene.pdf}
\caption{Out-of-distribution scenarios. We removed one vehicle from the nominal scenes and shifted the initial longitudinal headway $\Delta x$ to unseen values.} \label{fig:ood_scene}
\end{figure}

We intend to quantify three typical characteristics of the following behavior with the metrics defined above: 1) staying behind the leading vehicle; 2) maintaining a substantial safe distance from the leading vehicle; 3) keeping in the same lane as the leading vehicle. \textcolor{black}{We consider the following vehicle's maneuver successful if the vehicle manages to keep a substantial positive final headway. And we consider two vehicles collide if the minimum distance between them is smaller than a safety threshold.} Lastly, we expect the following behavior to attain a negative $\Delta y$, which means the following vehicle attempts to approach the leading vehicle's lane.

All metrics were applied in the lane-changing scenario, but we only adopted $\mathrm{Success Rate}$ in the car-following scenario. Since we only model the longitudinal dynamics, $\Delta y$ is not applicable. For the same reason, if their initial positions are too close or the following vehicle located ahead of the leading one initially, the following vehicle will inevitably crush into the leading vehicle, which results in $d_{\min}=0$. Therefore, we only care about the first characteristic.

The results are summarized in Fig. \ref{fig:ood_stats_cf} and Fig. \ref{fig:ood_stats_lc}, where we plot the mean values of the evaluated metrics versus $\Delta x$. \textcolor{black}{In the car-following scenario, the NRI policy fails to slow down Vehicle 0 to follow Vehicle 1 when $\Delta x$ becomes negative. In contrast, the supervised policy and GRI policy maintain high success rates with negative $\Delta x$. However, the number of failure cases starts to increase for the supervised policy when $\Delta x$ becomes substantially negative, whereas the GRI policy maintains a perfect success rate over the tested range of perturbation.} We visualize a marginal example in Fig.\ref{fig:ood}, where both the NRI policy and the supervised one fail to maintain a positive final headway.

\begin{figure}[t]
\centering
\includegraphics[width=2.8in]{ood_stats_cf.pdf}
\caption{\textcolor{black}{Results in out-of-distribution synthetic car-following scenario. We plot $\mathrm{Success Rate}$ versus $\Delta x$ with the error band denoting $95\%$ confidence interval of the indicator, $\mathbf{1}(\Delta x^f_i\geqslant \delta_f)$. We set $\delta_f=2\mathrm{m}$.}} \label{fig:ood_stats_cf}
\end{figure}

\textcolor{black}{In the lane-changing scenario, the GRI policy maintains a consistent perfect success rate over all tested values of $\Delta x$. For the other two models, the success rates drastically decrease with decreasing $\Delta x$.} In terms of $\Delta y$, all models tend to reduce the lateral distance between the vehicles which is consistent with the second characteristic of the following behavior. However, we found that the GRI policy attains an average $\Delta y$ with smaller magnitude and the magnitude decreases with decreasing $\Delta x$. It implies that the GRI policy changes its strategy when the initial position of Vehicle 0 is ahead of Vehicle 1. In order to keep a proper safe distance, Vehicle 0 does not change its lane until Vehicle 1 surpasses itself. On the other hand, the lateral behavior is unchanged for the other two models. However, the vehicle cannot maintain a substantial safe distance if it changes its lane too early, \textcolor{black}{which is verified by the plot of collision rate versus $\Delta x$}. The difference in their strategies is further illustrated by the example visualized in Fig. \ref{fig:ood}.

\begin{figure}[t]
\centering
\includegraphics[width=3.0in]{ood_stats_lc.pdf}
\caption{\textcolor{black}{Results in out-of-distribution synthetic lane-changing scenario. We plot $\mathrm{Success Rate}$, $\mathrm{Collision Rate}$, and the mean value of $\Delta y$ versus $\Delta x$. The error bands denote $95\%$ confidence interval. For $\mathrm{Success Rate}$ and $\mathrm{Collision Rate}$, the error bands are of the indicator functions. We set $\delta_f=\delta_c=2\mathrm{m}$.}} \label{fig:ood_stats_lc}
\end{figure}

\begin{figure}[t]
\centering
\includegraphics[width=2.9in]{ood_stats_cf_ngsim.pdf}
\caption{\textcolor{black}{Results in out-of-distribution naturalistic traffic car-following scenario. We plot $\mathrm{Success Rate}$ versus $\Delta x$ with the error bands denoting $95\%$ confidence interval of the indicator, $\mathbf{1}(\Delta x^f_i\geqslant \delta_f)$. We set $\delta_f=2\mathrm{m}$.}} \label{fig:ood_stats_cf_ngsim}
\end{figure}

\begin{figure}[t]
\centering
\includegraphics[width=3.2in]{ood_stats_ngsim_lc.pdf}
\caption{\textcolor{black}{Results in out-of-distribution naturalistic traffic lane-changing scenario. We plot $\mathrm{Success Rate}$, $\mathrm{Collision Rate}$, and the mean value of $\Delta y$ versus $\Delta x$. The error bands denote $95\%$ confidence interval. For $\mathrm{Success Rate}$ and $\mathrm{Collision Rate}$, the error bands are of the indicator functions. We set $\delta_f=\delta_c=2\mathrm{m}$.}} \label{fig:ood_stats_ngsim_lc}
\end{figure}

\begin{figure*}[t]
\centering
\includegraphics[width=6.9in]{ood.pdf}
\caption{\textcolor{black}{Examples where the leading car is placed behind the following one at the initial timestep. The trajectories are visualized as a sequences of rectangles. Each rectangle represents a vehicle at a specific time step. The vehicles are driving along the positive direction of the x-axis. The GRI policy still prompts the car-following behavior: It slows down the vehicle until the leading one surpasses it. Meanwhile, the NRI policy and the supervised one do not behave as $\mathcal{G}_\mathrm{interact}$ suggests.}} \label{fig:ood}
\end{figure*}

We repeat the experiment on the NGSIM datasets. Similar to the case of synthetic dataset, we remove one vehicle from each scene, resulting in an interaction graph consisting of a single edge (Fig. \ref{fig:ood_scene}). \textcolor{black}{It is worth noting that removing a vehicle from a scene alters the dynamic of the interacting system. It is not fair to expect the models to synthesize the same trajectories in the dataset. Therefore, we do not aim to compare the generated trajectories with the ones in the dataset in this out-of-distribution test. We just check whether the generated trajectories satisfy the desired characteristics of the corresponding interactive behaviors.}

In the lane-changing case, the remaining edge has the type of yielding. According to our definition of the yielding relation, we consider the same characteristics and adopt the same metrics defined in Eqn. (\ref{eqn:success_rate})-(\ref{eqn:lateral_distance}) for evaluation. Since we do not have control over the data generation procedure, we generate out-of-distribution test samples with different levels of discrepancy by controlling the ratio of longitudinal headway change. Given a sample from the original test dataset, we generate its corresponding out-of-distribution sample by shifting its initial longitudinal headway $\Delta x$ by a certain ratio, denoted by $\delta$, resulting in a new longitudinal headway $\Delta x'$:
\begin{equation*}
\Delta x' = (1 - \delta)\Delta x.
\end{equation*}
We evaluate the models on datasets generated with different values of $\delta$. We are particularly interested in the cases when $\delta\geqslant1$, which leads to a negative initial headway. We present the results in Fig. \ref{fig:ood_stats_cf_ngsim} and $\ref{fig:ood_stats_ngsim_lc}$. The comparison is quite consistent with the synthetic scenarios. Compared to the other baselines, our GRI policy can synthesize trajectories that satisfy the desired semantic properties in a larger range of distribution shift.

The results suggest that even though the NRI model can accurately reconstruct the trajectories, the unsupervised latent space and the corresponding policies do not capture the semantic meanings behind the interactions. In contrast, the GRI model learns a semantically meaningful latent space which is consistent with human domain knowledge. Another useful insight we draw from the experiment is that interaction labels are not sufficient to induce an explainable model with semantic latent space. Even though the supervised policy utilizes additional information on the ground-truth interaction graph, it fails to synthesize the following behavior in novel scenarios. Although the GRI model still has a considerable gap in reconstruction performance compared to the supervised baseline, it provides a promising and principled manner to incorporate domain knowledge into a learning-based autonomous driving system and induce an explainable model.

{\color{black}
\section{Discussion and Limitation}
\label{sec:discussion}
\subsection{Application of the Semantic Latent Space}
Enabling an explainable model is a crucial step towards trustworthy human interaction. However, it is still unclear how humans may benefit from the improved explainability. We would like to have a brief discussion on the potential application of the semantic latent space introduced in GRI. When the autonomous vehicle encounters an unfamiliar situation (e.g., the out-of-distribution scenarios studied in Sec. \ref{sec:ood}), a semantic latent space gives the safety drivers or passengers the privilege to review and override the inferred interaction graph if the model misunderstands the scenario. In constrast, humans can neither understand an interaction graph nor identify the correct edge types, if the learned interactive behaviors do not have explicit semantic meaning. Such kind of safety assurance could help building up a safe and trustworthy cooperation between humans and the autonomous vehicles.

However, it is impractical to keep the users monitoring the model output in real-time. Instead, we can introduce an additional module to detect out-of-distribution scenes \cite{filos2020can, SunL-RSS-21} and use the estimated epistemic uncertainty to decide when to query the end users. In \cite{filos2020can}, the authors proposed an adaptive variant of their robust imitative planning algorithm, which incorporates such a unit. It is also a common practice for current autonomous driving companies to have human assistants for vehicles to query when encountering abnormal situation.

\subsection{Limitation of the Learning Algorithm}
In our experiments, GRI always has higher reconstruction error than NRI, especially on the synthetic dataset. One of the reasons is that reconstruction error is not directly optimized under the AIRL formulation. The objective function of NRI consists of a reconstruction loss, which essentially minimizes the Euclidean distance between the reconstructed trajectory and the ground-truth one. In other words, it directly minimizes the RMSE metrics used in our evaluation. In contrast, GRI adopts the objective function of AIRL, which also minimizes a distance between the trajectory pair. However, the distance is defined by the learned discriminator and is not necessarily equivalent to the Euclidean distance. In Appx. \ref{app:airl_ablation}, we study two AIRL baseline models on the synthetic dataset. The results suggest that none of these AIRL-based approaches achieve the same reconstruction performance as NRI.

Another reason is that the current learning algorithm is not quite stable, because of the adversarial training scheme we introduce when incorporating AIRL into the original NRI model. In typical AIRL settings, we may mitigate this problem by warmstarting the training with a policy network pretrained through imitation learning or behavior cloning \cite{finn2016guided, yu2019meta}. However, since we aim to learn a semantic latent space, warmstarting the training with a model with unsupervised latent space is not helpful. Alternatively, we may initialize the policy decoder with the supervised one. One issue is that it will change our current setting where human labels are not required. We will investigate this new setting in our future work, and develop a more stable training scheme to further optimize the performance of GRI. A stable training scheme is also a prerequisite before applying GRI to more sophisticated real-world scenarios.

The structured reward functions also interfere the stability of the learning procedure. Compared to the variant of GRI studied in Appx. \ref{app:airl_ablation} with semantic reward functions removed, we found GRI is more sensitive to hyperparameters and prone to diverging if not carefully tuned. It is because that although the structured reward functions are differentiable, it is not guaranteed that the reward functions can be stably optimized through gradient descent. In our future work, we will explore a more stable and robust learning scheme with those structured reward functions.
}

\section{Conclusion and Future Work}
\label{sec:conclusion}
In this work, we propose Grounded Relational Inference (GRI), which models an interactive system's underlying dynamics by inferring the agents' semantic relations. By incorporating structured reward functions, we ground the relational latent space into semantically meaningful behaviors defined with expert domain knowledge. We demonstrate that GRI can model interactive traffic scenarios under both simulation and real-world settings, and generate semantic interaction graphs explaining the vehicle's behavior by their interactions.

Although we limit our experiments to the autonomous driving domain, the model itself is formulated without specifying the context. As long as proper domain knowledge is available, the proposed method can be extended naturally to other fields (e.g., human-robot interaction). However, there are several technical gaps we need to bridge before extending the current framework to more complicated traffic scenarios and interactive systems in other fields. One gap between the current model and these practical modules is graph dynamics. Throughout the paper, we assume a static interaction graph over the time horizon. We will investigate how to incorporate dynamic graph modeling into the current framework. Another gap is the cooperative assumption, which we would like to remove in the future so that the framework can be generalized to non-cooperative scenarios. Besides, as we have mentioned before, the GRI model still has a considerable gap in reconstruction performance compared to the other baselines. In future work, we will improve the model architecture and training algorithm to fill the performance gap while maintaining the advantages of GRI as an explainable model.

\section{Appendix}
\subsection{Graph Neural Network Model Details} \label{app:model}
In terms of model structure, both the encoder and the policy decoder are built based on node-to-node message-passing \cite{gilmer2017neural}, consisting of a node-to-edge message-passing and an edge-to-node message-passing:
\begin{align}
v\rightarrow e:\ \ \mathbf{h}^l_{i,j} & = f^l_e(\mathbf{h}^l_i, \mathbf{h}^l_j, \mathbf{x}_{i,j}), \label{eqn:gnn-1}\\
e\rightarrow v:\ \mathbf{h}^{l+1}_{j} & = f^l_v(\sum\nolimits_{i\in\mathcal{N}_j}\mathbf{h}^l_{i,j}, \mathbf{x}_j), \label{eqn:gnn-2}
\end{align}
where $\mathbf{h}^l_i$ is the embedded hidden state of node $v_i$ in the $l^{\rm th}$ layer and $\mathbf{h}^l_{i,j}$ is the embedded hidden state of the edge $e_{i,j}$. The features $\mathbf{x}_i$ and $\mathbf{x}_{i,j}$ are assigned to the node $v_i$ and the edge $e_{i,j}$ respectively as inputs. $\mathcal{N}_j$ denotes the set of the indices of $v_i$'s neighbouring nodes connected by an incoming edge. The functions $f^l_e$ and $f^l_v$ are neural networks for edges and nodes respectively, shared across the graph within the $l^\mathrm{th}$ layer of node-to-node massage-passing.

{\bf GNN Encoder.} The GNN encoder is essentially the same as in NRI. It models the posterior distribution as $q_\phi(\mathbf{z}\vert\boldsymbol{\tau})$ with the following operations:
\begin{align*}
\mathbf{h}^1_j & = f_{\mathrm{emb}}(\mathbf{x}_j), \\
v\rightarrow e:\ \: \mathbf{h}^1_{i,j} & = f^1_e(\mathbf{h}^1_i, \mathbf{h}^1_j), \\
e\rightarrow v:\ \ \ \mathbf{h}^{2}_{j} & = f^1_v\left(\sum\nolimits_{i\neq j}\mathbf{h}^1_{i,j}\right), \\
v\rightarrow e:\ \: \mathbf{h}^2_{i,j} & = f^2_e(\mathbf{h}^2_i, \mathbf{h}^2_j), \\
q_\phi(\mathbf{z}_{i,j}\vert{\boldsymbol{\tau}}) & = \mathrm{softmax}\left(\mathbf{h}^2_{i,j}\right),
\end{align*}
where $f_e^1, f_v^1$ and $f_e^2$ are fully-connected networks (MLP) and $f_{\mathrm{emb}}$ is a 1D convolutional networks (CNN) with attentive pooling.

{\bf GNN Policy Decoder.} The policy operates over $\mathcal{G}_\mathrm{interact}$ and models the distribution $\boldsymbol{\pi}_\eta \left(\mathbf{a}^t\vert{\mathbf{x}^t, \mathbf{z}}\right)$, which can be factorized with $\pi_\eta\left(\mathbf{a}^t_j\vert{\mathbf{x}^t, \mathbf{z}}\right)$ as in Eqn. (\ref{eqn:facto}). We model $\pi_\eta$ as a Gaussian distribution with the mean value parameterized by the following GNN:
\begin{align}
v\rightarrow e:\ \ \ \ \Tilde{\mathbf{h}}^t_{i,j} & = \sum_{k=0}^{K}\mathbf{1}(z_{i,j}=k) \Tilde{f}^{k}_{e}(\mathbf{x}^t_i, \mathbf{x}^t_j), \label{eqn:policy1}\\
e\rightarrow v:\ \ \ \ \ \ \mathbf{\mu}_j^t & = \Tilde{f}_v\left(\sum\nolimits_{i\neq j}{\Tilde{\mathbf{h}}^t_{i,j}}\right), \label{eqn:policy2} \\
\pi_\eta\left(\mathbf{a}^t_j\vert{\mathbf{x}^t, \mathbf{z}}\right) & = \mathcal{N}(\boldsymbol{\mu}^t_j, \sigma^2\mathbf{I}) \label{eqn:policy3}.
\end{align}

Alternatively, the model capacity is improved by using a recurrent policy $\pi_\eta\left(\mathbf{a}^t_j\vert \mathbf{x}^t, \dots, \mathbf{x}^1, \mathbf{z}\right)$; Namely, the agents take actions according to the historical trajectories of the system. We follow the practice in \cite{kipf2018neural} and add a GRU unit to obtain the following recurrent model:
\begin{align}
v\rightarrow e:\ \ \ \ \Tilde{\mathbf{h}}^t_{i,j} & = \sum_{k=0}^{K}\mathbf{1}(z_{i,j}=k) \Tilde{f}^{k}_{e}\left(\Tilde{\mathbf{h}}^{t}_i,\Tilde{\mathbf{h}}^{t}_j\right), \label{eqn:rnn1}\\
e\rightarrow v:\ \ \ \Tilde{\mathbf{h}}^{t+1}_j & = \mathrm{GRU}\left(\sum\nolimits_{i\neq j} \Tilde{\mathbf{h}}^t_{i,j}, \mathbf{x}^t_j, \Tilde{\mathbf{h}}^t_{j} \right), \\
\mathbf{\mu}_j^t & = f_\mathrm{out}\left(\Tilde{\mathbf{h}}^{t+1}_j\right), \\
\pi_\eta\left(\mathbf{a}^t_j\vert \mathbf{x}^t, \dots, \mathbf{x}^1, \mathbf{z}\right) & = \mathcal{N}(\boldsymbol{\mu}^t_j, \sigma^2\mathbf{I}), \label{eqn:rnn4}
\end{align}
where $\Tilde{\mathbf{h}}^t_i$ is the recurrent hidden state encoding the historical information up to the time step $t-1$.

{\color{black}
\subsection{Reconstruction Visualization on Synthetic Dataset}\label{app:visual}

\begin{figure*}[t]
\centering
\includegraphics[width=7.1in]{std_synthetic.png}
\caption{\textcolor{black}{Average standard deviation of states along the time horizon. (a) and (b) show the standard deviation of $x$ and $v$ in the synthetic car-following scenario. (c)-(e) show the standard deviation of $x$, $y$, and $v$ in the synthetic lane-changing scenario.}}
\label{fig:std_synthetic}
\end{figure*}

\begin{figure}[t]
\centering
\includegraphics[width=3in]{visual_lc.png}
\caption{\textcolor{black}{Visualization of the reconstructed trajectories in a lane-changing scene. (a) and (b) correspond to the trajectories of Car 1 and Car 0 respectively. We visualize the distributions of the reconstructed trajectories estimated using kernel density estimate. The ground-truth trajectories are denoted by the blue curves.}}
\label{fig:visual_lc}
\end{figure}

In our experiments, we found that GRI has significantly larger reconstruction error on the synthetic dataset than the NRI baseline. To better understand this performance gap on reconstruction, we looked into the reconstructed trajectories of both models. Instead of executing the mean value of the policy output as we did in our main experiments, we sampled the actions from the policy distribution to estimate the variance of reconstructed trajectories. In Fig. \ref{fig:std_synthetic}, we plot the average standard deviation of reconstructed states along the time horizon. We observed that the policy decoder of GRI tends to have larger variance. It partially explains the large RMSE values reported in Table \ref{table:synthetic}: the metrics were computed with a single reconstructed trajectory. The policy distribution of GRI still has larger bias than the one of NRI. We visualize the reconstructed trajectories of a lane-changing case in Fig. \ref{fig:visual_lc}. While the GRI policy induces larger variance, the distribution of the reconstructed trajectories is sensible.

\subsection{AIRL Ablation Study} \label{app:airl_ablation}
With the motivation of incorporating semantic meaning into the relational latent space, we developed GRI by introducing AIRL into relational inference and studied how the semantic reward functions may guide relational latent space learning. Meanwhile, it would be interesting to take a different perspective and study the effects of introducing relational inference and semantic reward functions into AIRL. In this section, we take the synthetic scenarios as examples and conduct an ablation study, where we compare GRI against two variants.

The first one is a AIRL variant, denoted by GRI-AIRL, which is obtained by removing relational inference and semantic reward functions from GRI. Concretely, both the policy and reward decoders operate on a fully-connected interaction graph with homogeneous edge type. And we simply use MLPs to model the reward functions in Eqn. (\ref{eqn:node_reward}) and (\ref{eqn:edge_reward}), instead of those semantic reward functions. The objective function then becomes Eqn. (\ref{eqn:opt-2}), but without neither the expectation over $\mathbf{z}$ nor the information bottleneck constraint. The second one is a variational AIRL variant, denoted by GRI-VAIRL, in which we introduce relational inference but do not use the semantic reward functions. In this case, the objective function is identical to the one in GRI, i.e., Eqn. (\ref{eqn:opt-2}).

\begin{figure}[t]
\centering
\includegraphics[width=2.5in]{graph_vairl.pdf}
\caption{\textcolor{black}{The interaction graph inferred by the GRI-VAIRL model in the synthetic lane-changing scenario.}}
\label{fig:graph_vairl}
\end{figure}

The results are summarized in Table \ref{table:ablation}. For the car-following scenario, the reconstruction performance is improved after introducing relational inference into AIRL. It is interesting that the GRI-VAIRL variant is able to recover the ground-truth interaction graph, even without the semantic reward functions. It makes sense because the car-following scenario only consists of a single non-trivial edge type. It is plausible for the model to distinguish non-interaction edges from the others, because null reward is enforced for non-interaction edges. In some senses, we may still consider the reward function semantic\textemdash it incorporates the semantic meaning of non-interaction into the latent space. However, we cannot guarantee that GRI-VAIRL can distinguish between different non-trivial interactive behaviors, which is verified by the lane-changing case. Fig. \ref{fig:graph_vairl} shows the inferred interaction graph. The model only adopts a single non-trivial edge type to describe all the interactive behaviors. Compared to the ground-truth graph, the inferred graph has an additional edge $z_{2,1}$ but ignores the edge $z_{1,0}$. Ignoring the edge $z_{1,0}$ limits the modeling capacity of the policy decoder, which could possibly explain why GRI-VAIRL has larger $\mathrm{RMSE}_x$ and $\mathrm{RMSE}_v$ than GRI-AIRL in the lane-changing case.

In summary, we could improve reconstruction performance by introducing relational inference into AIRL. Even if GRI-VAIRL has larger reconstruction error in the lane-changing case due to the biased inferred graph, we still observe that GRI-VAIRL converges faster. The learning process becomes more stable and less sensitive to different hyperparameters. We think it is because the model may identify those agents that are not interacting with each other, preventing the reward decoder from fitting a reward function unifying both interactive and non-interactive behaviors. Meanwhile, it is still necessary to incorporate semantic reward functions to differentiate different interactive behaviors and induce a semantically meaningful interaction graph. However, semantic latent space comes at a cost of reconstruction performance. The structured reward functions limit the modeling capacity of the reward decoder. Also, although the structured reward functions are differentiable, it is not guaranteed that they can be well optimized through gradient descent. As a result, they may interfere the stability of the learning procedure.

\begin{table*}[t]
\centering
\caption{\textcolor{black}{Ablation Study on Synthetic Dataset}}
\label{table:ablation}
\begin{threeparttable}
\centering
\begin{tabular}{|c|c|c|c|c|c|c|c|}
\hline
\multirow{2}{*}{Model} & \multicolumn{3}{c|}{Car Following ($\Delta t=0.2s$, $T=20$)} & \multicolumn{4}{c|}{Lane Changing ($\Delta t=0.2s$, $T=30$)} \\ \cline{2-8}
& $\mathrm{RMSE}_x(\mathrm{m})$ & $\mathrm{RMSE}_v(\mathrm{m/s})$ & $\mathrm{Accuracy}(\%)$ & $\mathrm{RMSE}_x(\mathrm{m})$ & $\mathrm{RMSE}_y(\mathrm{m})$ & $\mathrm{RMSE}_v(\mathrm{m/s})$ & $\mathrm{Accuracy}(\%)$ \\ \hline GRI & $0.241\pm{0.125}$ & $0.174\pm{0.068}$ & $\mathbf{100.00\pm{0.00}}$ & $0.529\pm{0.230}$ & $0.207\pm{0.046}$ & $0.303\pm{0.128}$ & $\mathbf{99.95\pm{0.01}}$ \\ \hline GRI-VAIRL & $\mathbf{{0.120\pm{0.054}}}$ & $\mathbf{{0.116\pm{0.039}}}$ & $\mathbf{100.00\pm{0.00}}$ & ${0.377\pm{0.201}}$ & $\mathbf{{0.160\pm{0.038}}}$ & ${0.190\pm{0.058}}$ & $50.0\pm{0.00}$ \\ \hline GRI-AIRL & $0.138\pm{0.068}$ & $0.150\pm{0.043}$ & - & $\mathbf{0.304\pm{0.321}}$ & $0.198\pm{0.065}$ & $\mathbf{0.173\pm{0.101}}$ & -\\ \hline
\end{tabular}
\begin{tablenotes}
\item[1] The data is presented in form of $\text{mean}\pm{\text{std}}$.
\end{tablenotes}
\end{threeparttable}
\end{table*}
}
\title{ Real Paley-Wiener theorems \\ for the Dunkl transform on $I\!\!R^d$}

\section { Introduction}
\hspace*{5mm} In the last few years there has been a great interest to real Paley-Wiener theorems for certain integral transforms, see \cite{Ta} for an overview references and details for this question.\\ \hspace*{5mm} In this paper we consider the Dunkl operators $T_j, j=1,...,d$, which are the differential-difference operators introduced by C.F.Dunkl in
\cite{D1}. These operators are very important in pure Mathematics and in Physics. They provide a useful tool in the study of special functions with root systems (see \cite{D2}.)
\\ \hspace*{5mm}C.F.Dunkl in \cite{D3} (see also \cite{J}) has studied a Fourier transform ${\cal F}_{D}$, called Dunkl transform defined for a regular function $f$ by $$\forall \, x \in I\!\!R^d, \; {\cal F}_D f(x) = \displaystyle\int_{I\!\!R^d}K(-ix,y) f(y)\omega_k(y)dy, $$ where $K(-ix,y)$
represents the Dunkl kernel and $\omega_k $ a weight function.\\
\hspace*{5mm} The aim purpose of this paper is to prove real Paley-Wiener theorems on the Schwartz space ${\cal S}(I\!\!R^d)$ and on $L^2_k(I\!\!R^d)$. More precisely we consider first the Paley-Wiener spaces associated with the Dunkl operators:
$$\begin{array}{ccc}
PW_{k}^2(I\!\!R^d) & = & \{f \in {\cal E}(I\!\!R^d)/\forall\, n\, \in\, I\!\!N,\;
\, \triangle_k^n f \in L^2_k(I\!\!R^d) \,
\mbox{and} \, R_f^{\triangle_k} = \displaystyle\lim_{n\to \infty}
||\triangle_k f||_{k,2}^{\frac{1}{2n}} < +\infty\} \\
PW_{k}(I\!\!R^d) & = & \{f \in {\cal E}(I\!\!R^d)/\forall\, n,m\, \in\, I\!\!N, \,
\; (1+||x||)^m \triangle_k^n f \in L^2_k(I\!\!R^d) \,
\; \mbox{and} \, R_f^{\triangle_k} < +\infty\},
\end{array}$$
where $ {\cal E}(I\!\!R^d) $ is the space of $C^\infty$-functions on
$I\!\!R^d$, $\triangle_k = \displaystyle\sum_{j=1}^d T_j^2$ the Dunkl-Laplacian operator, $L^2_k(I\!\!R^d)$ the space of square integrable functions with respect to the measure $\omega_k(x) dx$
and $||.||_{k,2}$ the norm of the space $L^2_k(I\!\!R^d)$.\\ We establish that ${\cal F}_D$ is a bijection from $PW_{k}^2(I\!\!R^d)$
onto $L^2_{k,c}(I\!\!R^d)$(the space of functions in $L^2_k(I\!\!R^d)$
with compact support), and from $PW_{k}(I\!\!R^d)$ onto $D(I\!\!R^d)$(the space of $C^\infty$-functions on $I\!\!R^d$ with compact support).\\
\hspace*{5mm}Next, we characterize the $L^2_k(U)$-functions by their Dunkl transform, where $U$ is respectively a disc, a symmetric body, a nonconvex and an unbounded domain in $I\!\!R^d$. These results are the real Paley-Wiener theorems for square integrable functions with respect to the measure $\omega_k(x)dx$.\\
\hspace*{5mm}We generalize also a theorem of H.H.Bang \cite{B} by characterizing the support of the Dunkl transform of functions in
$ {\cal S}(I\!\!R^d) $ by an
$L^p$ growth condition. More precisely these real Paley-Wiener theorems can be stated as follow:\\ $\bullet $ The Dunkl transform ${\cal F}_D(f)$ of $f \in {\cal S}(I\!\!R^d)$ vanishes outside a polynomial domain $U_P = \{x \in I\!\!R^, \; P(x) \leq 1\}$,
with $P$ a non constant polynomial, if and only if $$\limsup_{n
\to +\infty}||P^n (iT)f||_{k,p} \leq 1, \; 1 \leq p \leq \infty,
$$ with $T = (T_1,...,T_d)$ and $||.||_{k,p}$ is the norm of the space $L^p_k(I\!\!R^d)$ of $p^{th}$ integrable functions on $I\!\!R^d$
with respect to the measure $\omega_k(x)dx$.
\\ $\bullet$ A function $f \in {\cal S}(I\!\!R^d)$ is the Dunkl transform of a function vanishing in some ball with radius $r$
centered at the origin, if and only if $$ \lim_{n \to \infty}
||\displaystyle\sum_{m = 0}^{\infty} \frac{(n\triangle_k)^m\, f
}{m!}||_{k,p}^{\frac{1}{n}}\leq \exp(-r^2), \; 1 \leq p \leq
\infty. $$ \hspace*{5mm} This paper is arranged as follows:\\
\hspace*{5mm}In the second section we recall the main results about the harmonic analysis associated with the Dunkl operators.\\
\hspace*{5mm} The third section is devoted to study the functions such that the support of their Dunkl transform are compact, and to establish the real Paley-Wiener theorems for ${\cal F}_D$ on the Schawrz space ${\cal S}(I\!\!R^d)$.\\
\hspace*{5mm} In the fourth section we characterize the functions in ${\cal S}(I\!\!R^d)$ such that their Dunkl transform vanishes outside a polynomial domain.\\ \hspace*{5mm} In the fifth section we give a necessary and sufficient condition for functions in
$L^2_k(I\!\!R^d)$ such that their Dunkl transform vanishes in a disc.
\\ \hspace*{5mm} We study in the sixth section the functions such that their Dunkl transform satisfies the symmetric body property, and we derive a real Paley-Wiener type theorem for these functions.\\

\section{Harmonic analysis associated for the Dunkl operators.}
\hspace*{5mm} In the first two subsections we collect some notations and results on Dunkl operators, the Dunkl kernel and the Dunkl intertwining operators (see [6],[7],[8]).
\subsection { Reflection groups, root system and multiplicity functions} \hspace*{5mm}We consider $I\!\!R^d$ with the euclidean scalar product $<.,.>$ and $||x||=\sqrt{\langle x,x\rangle}$. On
${I\!\!\!\!C}^{d},\;||.||$ denotes also the standard Hermitian norm\ while\ for\ all\ $z=(z_{1},\;...,\;z_{d}%
),\;w=(w_{1},\;...,\;w_{d})\in{I\!\!\!\!C}^{d},$%
\[
<z,w>=\displaystyle\sum_{j=1}^{d}z_{j}\overline{w}_{j}.
\]
For $\alpha\inI\!\!R^d\backslash\{0\}$, let $\sigma_{\alpha}$ be the reflection in the hyperplan $H_{\alpha}\subsetI\!\!R^d$ orthogonal to
$\alpha$, i.e.
\begin{equation}
\sigma_{\alpha}(x)=x-2\frac{\langle\alpha,x\rangle}{||\alpha||^{2}}\alpha.
\label{2.1}%
\end{equation}
A finite set $R\subsetI\!\!R^d\backslash\{0\}$ is called a root system if $R\capI\!\!R.\alpha=\{\alpha,-\alpha\}$ and $\sigma_{\alpha}R=R$
for all $\alpha\in R$. For a given root system R the reflection
$\sigma _{\alpha},\alpha\in R$, generate a finite group $W\subset O(d)$, the reflection group associated with R . We denote by $|W|$
its cardinality. All reflections in W correspond to suitable pairs of roots. For a given $\beta \inI\!\!R \backslash {\alpha\in R}{\cup}H_{\alpha}$, we fix the positive subsystem
$R_{+}=\{\alpha\in R\;/\langle\alpha,\beta\rangle>0\}$,
then for each $\alpha\in R,$ either $\alpha\in R_{+}$ or $-\alpha\in R_{+}%
$.\newline A function $k:R\longrightarrow{I\!\!\!\!C}$ on a root system
$R$ is called a multiplicity function if it is invariant under the action of the associated reflection group $W$. If one regards $k$
as a function on the corresponding reflections, this means that k is constant on the conjugacy classes of reflections in $W$. For abbreviation, we introduce the index
\begin{equation}
\gamma=\gamma(k)=\displaystyle\sum_{\alpha\in R_{+}}k(\alpha). \label{2.2}%
\end{equation}
Moreover, $\omega_{k}$ denotes the weight function
\begin{equation}
\omega_{k}(x)=\prod_{\alpha\in R_{+}}|\langle\alpha,x\rangle|^{2k(\alpha)},
\label{2.3}%
\end{equation}
which is$\;W-$invariant and homogeneous of degree
$2\gamma$.\newline We introduce the Mehta-type constant
\begin{equation}
c_{k}=(\int_{I\!\!R^{d}}\exp(-||x||^{2})\omega_{k}(x)\;dx)^{-1}, \label{2.4}%
\end{equation}

\noindent{\bf{Remark }}\\ \hspace*{5mm} For $d=1$ and
$W=\mathbf{Z}_{2}$, the multiplicity function $k$ is a single parameter denoted $\gamma>0$ and we have $$
\forall\,x\inI\!\!R,\;\omega_{k}(x)=|x|^{2\gamma}. $$

\subsection{ Dunkl operators- The Dunkl kernel and the Dunkl intertwining operator}

\noindent{\bf{Notations}}. We denote by \\
- $C(I\!\!R^{d}) (resp \;C_{c}
(I\!\!R^{d}))$\, the space of continuous functions on $I\!\!R^{d}$ (resp.
with compact support).\\
- $C^{p}(I\!\!R^{d}) (resp \;C^{p}_{c}
(I\!\!R^{d}))$\, the space of functions of class $C^p$ on $I\!\!R^{d}$
(resp. with compact support).\\
- $ {\cal E}(I\!\!R^{d})$ the space of
$C^{\infty}$-functions on $I\!\!R^{d}$.\\ - $ {C}^\infty_0(I\!\!R^{d})$
the space of $C^{\infty}$-functions on $I\!\!R^{d}$ which vanish at the infinity.
\\ - ${\cal S}(I\!\!R^{d})$ the space of $C^{\infty}$-functions on $I\!\!R^{d}$ which are rapidly decreasing as their derivatives.\\ - $D(I\!\!R^{d})$ the space of
$C^{\infty}$-functions on $I\!\!R^{d}$ which are of compact support.\\
We provide these spaces with the classical topology .\\\\
We consider also the following spaces\\ - ${\cal E'}(I\!\!R^{d})$ the space of distributions on $I\!\!R^{d}$ with compact support. It is the topological dual of ${\cal E}(I\!\!R^{d})$.\\ - ${\cal S'}(I\!\!R^{d})$
the space of tempered distributions on $I\!\!R^{d}$. It is the topological dual of ${\cal S}(I\!\!R^{d})$.\\

The Dunkl operators $T_{j},\; j\; = 1\;, ...,\; d $, on $I\!\!R^{d}$
associated with the finite reflection group W and the multiplicity function k are given by
\begin{equation}
T_{j} f(x) = \frac{\partial}{\partial x_{j}} f(x) +
\displaystyle\sum_{\alpha \in R_{+}}k(\alpha) \alpha_{j}
\frac{f(x) - f(\sigma_{\alpha}(x))}{<\alpha,x>},\quad f \; \in \;
C^{1}(I\!\!R^{d}). \label{h9}
\end{equation}
In the case $k = 0$, the $T_{j}, \, j = 1, ... , d,$ reduce to the corresponding partial derivatives. In this paper, we will assume throughout that $k \geq 0$ and $\gamma > 0$.\\ \hspace*{5mm} The Dunkl Laplacian $\triangle_{k}$ on $I\!\!R^{d}$ is defined by
\begin{equation}
\triangle_{k}f = \displaystyle\sum_{j = 1}^{d}T_{j}^{2}f =
\triangle f + 2 \displaystyle\sum_{\alpha \in R_{+}} k_{\alpha}
\delta_{\alpha} (f), \quad f \in C^{2}(I\!\!R^{d}), \label{h12}
\end{equation}
where $\triangle = \displaystyle\sum_{j = 1}^{d} \partial_{j}^{2}$
the Laplacian on $I\!\!R^{d}$ and
$$ \delta_{\alpha}(f)(x) = \frac{<\nabla f(x),\alpha>}{<\alpha,x>} -
\frac{ f(x) - f(\sigma_{\alpha}(x))}{<\alpha,x>^{2}},$$ with
$\nabla f$ the gradient of f.\\ \hspace*{5mm} For $f $ in $
C_{c}^{1}(I\!\!R^{d})$ and $g \, in \, C^{1}(I\!\!R^{d})$ we have
\begin{equation}
\int_{I\!\!R^{d}} T_{j}f(x) g(x)\omega_{k}(x)\;dx = - \int_{I\!\!R^{d}}
f(x) T_{j}g(x)\omega_{k}(x)\;dx, \, j = 1, ..., d.
\label{hh6}
\end{equation}

For $y \in I\!\!R^{d} $, the system $$ \left\{
\begin{array}{crll}
T_{j}u(x,y) &=& y_{j} u(x,y),& j = 1, ..., d,\\\\ u(0,y) &=& 1,
&for\, all \; y \in \,I\!\!R^{d}.
\end{array}
\right. $$ admits a unique analytic solution on $I\!\!R^{d}$, denoted by $K(x,y)$ and called Dunkl kernel. This kernel has a unique holomorphic extension to ${I\!\!\!\!C}^{d} \times {I\!\!\!\!C}^{d}$.\\
\noindent{\bf{Example. }}\\ \hspace*{5mm} If $d = 1$ and $W ={\
Z}_{2}$, the Dunkl kernel is given by
\begin{equation}
K(z,w) = j_{\gamma - \frac{1}{2}}(izw) + \frac{zw}{2 \gamma + 1}
j_{\gamma + \frac{1}{2}}(izw), \quad z, \; w \in I\!\!\!\!C,
\label{h18}
\end{equation}
where for $\alpha \geq \frac{-1}{2}$, $j_{\alpha}$ is the normalized Bessel function of index $\alpha$
defined by
\begin{equation}
j_{\alpha}(z) = 2^{\alpha} \Gamma(\alpha + 1)
\frac{J_{\alpha}(z)}{z^{\alpha}} = \Gamma(\alpha + 1)
\displaystyle\sum_{n = 0}^{\infty}\frac{(-1)^{n}(\frac{z}{2})^{2 n} } {n! \Gamma(\alpha + 1 + n)}
\end{equation}
with $J_{\alpha}$ is the Bessel function of first kind and index
$\alpha$.\\\\
\hspace*{5mm} The Dunkl kernel possesses the following properties

\begin{Prop}\hspace*{-2mm}.i) For all $z, w \in I\!\!\!\!C^{d}$ we have.
\begin{equation}
K(z,w) = K(w,z) \quad ; K(z,0) = 1 \quad and\quad K(\lambda z,w) = K(z, \lambda w),\, for\; all \; \lambda \in I\!\!\!\!C.
\label{h20}
\end{equation}
\hspace*{5mm} ii) For all $\nu \in I\!\!N^{d}, x \in I\!\!R^{d}$ and $z \in I\!\!\!\!C^{d}$, we have
\begin{equation}
|D_{z}^{\nu} K(x , z)| \leq ||x||^{|\nu|} \,\exp(||x|| || Rez||),
\label{h21}
\end{equation}
and for all $x, y \in I\!\!R^{d}$ :
\begin{equation}
|K(i x , y)| \leq 1, \label{h23}
\end{equation}
with $D_{z}^{\nu} = \frac{\partial^{\nu}}{\partial z_{1}^{\nu_1}...\partial z_{d}^{\nu_d}}$ and $|\nu| = \nu_1 + ...
+ \nu_d.$\\ \hspace*{5mm} iii) For all $x, y \in I\!\!R^{d}$ and $g
\in W$ we have
\begin{equation}
K(-i x , y) = \overline{K(i x , y)}, \quad and \quad K(g x , g y)
= K( x , y).
\end{equation}
\label{P1.2}
\hspace*{5mm}$i\nu)$ The function $K(x,z)$ admits for all $x \in I\!\!R^{d}$ and $z \in I\!\!\!\!C^{d}$ the following Laplace type integral representation
\begin{equation}
K(x,z) = \displaystyle\int_{I\!\!R^d} e^{<y,z>} d\mu_{x}(y), \label{753}
\end{equation}
where $\mu_{x}$ is a probability measure on $I\!\!R^d$, with support in the closed ball $B(o, ||x||)$ of center o and radius
$||x||$.(See [11]).\end{Prop}

The Dunkl intertwining operator $V_k$ is defined on $C(I\!\!R^{d})$ by
\begin{equation}
\forall x \in I\!\!R^{d}, \quad V_k f(x) = \displaystyle\int_{I\!\!R^d}
f(y)d\mu_{x}(y), \label{str}
\end{equation}
where $\mu_{x}$ is the measure given by the relation (\ref{753}).
\\ The operator $V_k$ satisfies the following properties
\\ \hspace*{5mm}i)We have $$ \forall x \in I\!\!R^{d}, \; \; \forall z \in I\!\!\!\!C^{d}, \;
\; K(x,z) = V_k (e^{<.,z>})(x). $$ \hspace*{5mm} ii)The operator
$V_k$ is a topological isomorphism from ${\cal E}(I\!\!R^{d})$ onto itself satisfying the transmutation relation
\begin{equation}
\forall x \in I\!\!R^{d}, \quad T_j { V}_k (f)(x) = {V}_k
(\displaystyle\frac{\partial}{\partial y_j}f)(x), \quad j = 1, ... , d,
f \in {\cal E}(I\!\!R^d).
\label{9}
\end{equation}
\hspace*{3mm}ii) For each $x \in I\!\!R^d$ there exists a unique distribution $\eta_x$ in ${\cal E'}(I\!\!R^d)$ with support in the ball $B(o, ||x||)$, such that for all f in ${\cal E}(I\!\!R^d)$ we have
\begin{equation}
V_k^{-1}f(x) = <\eta_x, f>. \label{10}
\end{equation}
(See [16]).
\subsection{ The Dunkl transform}
\noindent{\bf{Notations}}. We denote by $L_{k}^{p}(I\!\!R^{d})$ the space of measurable functions on $I\!\!R^{d}$ such that $$
\begin{array}{crl}
||f||_{k,p}& =& (\displaystyle \int_{I\!\!R^{d}} |f(x)|^{p}
\omega_{k}(x) \;dx)^{\frac{1}{p}} < +\infty,
\quad if \; 1 \leq p
< + \infty,\\\\ ||f||_{k,\infty}& = & ess\; sup _{x \in I\!\!R^{d} }
|f(x)| < +\infty.
\end{array}
$$

\hspace*{5mm}The Dunkl transform of a function f in $D(I\!\!R^{d})$ is given by
\begin{equation}
\forall y \in I\!\!R^{d}, \quad {\cal F}_{D}(f) (y) =
\displaystyle\int_{I\!\!R^{d}}f(x) K(-iy,x) \omega_{k}(x)dx . \label{13}
\end{equation}
We give in the following some properties of this transform. (See
[7][8]).
\\\\ \hspace*{5mm} i) For all f in $L_{k}^{1}(I\!\!R^{d})$ we have
\begin{equation}
||{\cal F}_{D} (f)|| _{k, \infty} \leq ||f||_{k, 1}. \label{h26}
\end{equation}
\hspace*{5mm} ii) For all $f$ in ${\cal S}(I\!\!R^{d})$ we have
\begin{equation}
\forall y \in I\!\!R^{d}, \quad {\cal F}_{D}( T_j f)( y ) = i y_j
{\cal F}_{D}( f ) (y) \quad , j = 1, ..., d. \label{h28}
\end{equation}
\hspace*{5mm} $iii)$ For all f in $L_{k}^{1}(I\!\!R^{d})$ such that
${\cal F}_{D}(f)$ is in $L_{k}^{1}(I\!\!R^{d})$, we have the inversion formula
\begin{equation}
f(y) = \frac{c_{k}^{2}}{4^{\gamma + \frac{d}{2}}}\displaystyle
\int_{I\!\!R^{d}} {\cal F}_{D}(f)(x)
K(i x , y) \omega_{k}(x)\; dx
, \quad a.e. \label{h31}
\end{equation}
\begin{Th}\hspace*{-2mm}. The Dunkl transform ${\cal F}_{D}$
is a topological isomorphism. \\ \hspace*{5,5mm} i) From ${\cal S}(I\!\!R^{d})$ onto itself. \\ \hspace*{5,5mm} ii) From $D(I\!\!R^{d})$
onto $ {H}(I\!\!\!\!C^{d})$ (the space of entire functions on $I\!\!\!\!C^{d}$,
rapidly decreasing and of exponential type.) \\\noindent The inverse transform ${\cal F}_{D}^{- 1}$ is given by
\begin{equation}
\forall y \in I\!\!R^{d}, \quad {\cal F}_{D}^{-1}(f)(y) = \frac{c_{k}^{2}}{4^{\gamma + \frac{d}{2}}}
{\cal F}_{D}(f)(-y), \quad f \in S(I\!\!R^{d}). \label{h32}
\end{equation}
\end{Th}
\begin{Th} \hspace*{-2mm}. i) Plancherel formula for ${\cal F}_D$ .\\ For all f in ${\cal S}(I\!\!R^{d})$ we have
\begin{equation}
\displaystyle \int_{I\!\!R^{d}} |f(x)|^{2}
\omega_{k}(x)\; dx = \displaystyle \frac {c_{k}^{2}} {4^{\gamma + \frac{d}{2} } }\displaystyle \int_{I\!\!R^{d}}
| {\cal F}_{D}(f)(\xi)|^{2} \omega_{k}(\xi)\; d\xi. \label{h33}
\end{equation}
\hspace*{5mm} ii) Plancherel theorem for ${\cal F}_{D}$.\\The renormalized Dunkl transform $f \to 2^{-( \gamma + \frac{d}{2})}
c_{k} {\cal F}_{D}(f)$ can be uniquely extended to an isometric isomorphism on $L_{k}^{2}(I\!\!R^{d})$. \label{Tp}\end{Th}
\begin{Prop}\hspace*{-2mm}.
Let $1 \leq p \leq 2$. The Dunkl transform ${\cal F}_{D}$ can be extended to a continuous mapping from $L_{k}^{p}(I\!\!R^{d})$ into
$L_{k}^{q}(I\!\!R^{d}),$ with $q$ the conjugate component of $p$.
\label{cher}
\end{Prop}
\begin{Def}\hspace*{-2mm}. i) The Dunkl transform of a distribution $\tau$ in ${\cal S}'(I\!\!R^{d})$ is defined by $$ <
{\cal F}_{D}(\tau), \phi > = < \tau,{\cal F}_{D }(\phi)>, \quad
\phi \in {\cal S}(I\!\!R^{d}). $$ \hspace*{5mm} ii) We define the Dunkl transform of a distribution $\tau$ in ${\cal E'}(I\!\!R^d)$ by
$$ \forall \, y \in I\!\!R^d, \; {\cal F}_{D}(\tau)(y) = \langle
\tau_x, K(-ix,y) \rangle.$$
\end{Def}
\begin{Th}\hspace*{-2mm}.
The Dunkl transform ${\cal F}_{D}$
is a topological isomorphism. \\ \hspace*{5,5mm} i) From ${\cal S'}(I\!\!R^{d})$ onto itself. \\ \hspace*{5,5mm}ii) From ${\cal E}'(I\!\!R^{d})$ onto ${\cal H}(I\!\!\!\!C^{d})$(the space of entire functions on $I\!\!\!\!C^{d}$, slowly increasing and of exponential type.)
\end{Th}
\hspace*{5mm}Let $\tau$ be in ${\cal S'}(I\!\!R^d)$. We define the distribution $T_j \tau$, $j=1,...,d,$ by $$<T_j \tau, \psi> = -
<\tau, T_j \psi>, \; \mbox{ for\, all} \; \psi \, \in \; {\cal S}(I\!\!R^d).$$ This distribution satisfies the following properties
\begin{eqnarray}
{\cal F}_{D}(T_j \tau) &=& i y_j {\cal F}_{D}( \tau),\quad j = 1,
..., d. \label{sol1}
\\
{\cal F}_{D}(\triangle_k \tau) &=& -|| y||^2 {\cal F}_{D}( \tau).
\label{sol2}
\end{eqnarray}
\hspace*{5mm}We consider $f$ in $L^2_k(I\!\!R^d)$.We define the distribution $T_f$ in ${\cal S'}(I\!\!R^d)$ by $$\langle T_f,\varphi
\rangle = \displaystyle\int_{I\!\!R^d}f(x)\varphi(x)\omega_k(x)dx, \; \varphi \in
{\cal S}(I\!\!R^d).$$ In the following $T_f$ will be denoted by
$f$.
\begin{Prop}\hspace*{-2mm}. Let $f$ be in $L^2_k(I\!\!R^d)$. Then we have
\begin{equation}\label{ppppp}
{\cal F}_{D}(\triangle_k f) = -||x||^2 {\cal F}_{D}(f).
\end{equation}
\end{Prop}
\noindent{\bf{Proof}}\\\hspace*{5mm} For all $\varphi \in {\cal S}(I\!\!R^d)$ we have $$\langle \triangle_k f,\varphi\rangle =
\langle f,\triangle_k \varphi\rangle = \displaystyle\int_{I\!\!R^d}f(x)\triangle_k
\varphi(x)\omega_k(x)dx.$$ But $$\begin{array}{lll}
\langle {\cal F}_{D}(\triangle_k f),\varphi\rangle & = & \langle \triangle_k f,
{\cal F}_{D}(\varphi)\rangle = \langle f,\triangle_k {\cal F}_{D}(\varphi)\rangle\\
& = & \displaystyle\int_{I\!\!R^d}f(y){\cal F}_{D}(-||x||^2 \varphi(.)) (y) \omega_k(y)dy\\
& = & -\displaystyle\int_{I\!\!R^d}{\cal F}_{D}(f)(x)||x||^2 \varphi(x)
\omega_k(x)dx \\ &=& \langle -|| x||^2 {\cal F}_{D}(f),\varphi\rangle.
\end{array}$$
Thus $${\cal F}_{D}(\triangle_k f) = -||x||^2 {\cal F}_{D}(f).$$
\noindent{\bf{Notations.}} We denote by\\
\hspace*{5mm} - $L^2_{k,c}(I\!\!R^d)$ the space of functions in $L^2_{k}(I\!\!R^d)$
with compact support.\\
\hspace*{5mm} - ${\cal H}_{L^2_k}(I\!\!\!\!C^d)$ the space of entire functions $f$
on $I\!\!\!\!C^d$ of exponential type such that $f_{|I\!\!R^d}$ belongs to $L^2_{k}(I\!\!R^d)$.
\begin{Th}\hspace*{-2mm}.
The Dunkl transform ${\cal F}_D$ is bijective from
$L^2_{k,c}(I\!\!R^d)$ onto ${\cal H}_{L^2_k}(I\!\!\!\!C^d)$.
\end{Th}
\noindent{\bf{Proof}}\\ \hspace*{5mm}i) We consider the function
$f$ on $I\!\!\!\!C^d$ given by \begin{equation} \forall \, z \in I\!\!\!\!C^d, \;
f(z) = \displaystyle\int_{I\!\!R^d}g(x) K(-ix,z)
\omega_k(x)dx,\label{159753}\end{equation} with $g \in L^2_{k,c}(I\!\!R^d)$.\\ By derivation under the integral sign and by using the inequality (11), we deduce that the function
$f$ is entire on $I\!\!\!\!C^d$ and of exponential type.\\ On the other hand the relation (\ref{159753}) can also be written in the form $$ \forall \; y \in I\!\!R^d, \; f(y) = {\cal F}_{D}(g)(y).$$ Thus from Theorem \ref{Tp} the function
$f_{|I\!\!R^d}$ belongs to $L^2_{k}(I\!\!R^d)$. Thus $f \in {\cal H}_{L^2_k}(I\!\!\!\!C^d)$.
\\ \hspace*{5mm}ii) Reciprocally let $\psi $ be in ${\cal H}_{L^2_k}(I\!\!\!\!C^d)$. From Theorem 2.6 ii) there exists $S \in {\cal E'}(I\!\!R^d)$ with support in the ball $B(o,a)$ of center $o$ and radius $a$, such that
\begin{equation}\label{tgvam}
\forall \, y \in I\!\!R^d, \; \psi(y) = \langle S_x,
K(-ix,y)\rangle.
\end{equation}
On the other hand as $\psi_{|I\!\!R^d}$ belongs to $L^2_{k}(I\!\!R^d)$,
then from Theorem \ref{Tp} there exists \linebreak $h \in L^2_{k}(I\!\!R^d)$ such that
\begin{equation}\label{tgvam1}
\psi_{|I\!\!R^d} = {\cal F}_{D}(h).
\end{equation}
Thus from (\ref{tgvam}), for all $\varphi \in D(I\!\!R^d)$ we have
$$\begin{array}{lll}
\displaystyle\int_{I\!\!R^d}\psi(y) \overline{{\cal F}_{D}(\varphi)(y)}\omega_k(y)dy & = &
\langle S_x,\displaystyle\int_{I\!\!R^d}
K(-ix,y)\overline{{\cal F}_{D}(\varphi)(y)}\omega_k(y)dy\rangle.
\end{array}$$
Thus using (22) we deduce that
\begin{equation}\label{hhhhh}
\displaystyle\int_{I\!\!R^d}\psi(y) \overline{{\cal F}_{D}(\varphi)(y)}\omega_k(y)dy = \frac{4^{\gamma +
\frac{d}{2}}}{c_k^2}
\langle S,\varphi\rangle.
\end{equation}
On the other hand (\ref{tgvam1}) implies $$\displaystyle\int_{I\!\!R^d}\psi(y)
\overline{{\cal F}_{D}(\varphi)(y)}\omega_k(y)dy =
\displaystyle\int_{I\!\!R^d}{\cal F}_{D}(h)(y) \overline{{\cal F}_{D}(\varphi)(y)}\omega_k(y)dy.$$ But from Theorem 2.2 we deduce that
\begin{equation}\label{uuuuu}\begin{array}{lll}
\displaystyle\int_{I\!\!R^d}{\cal F}_{D}(h)(y) \overline{{\cal F}_{D}(\varphi)(y)}\omega_k(y)dy &=& \frac{4^{\gamma +
\frac{d}{2}}}{c_k^2}\displaystyle\int_{I\!\!R^d}h(y) \varphi(y)\omega_k(y)dy
\nonumber\\ &=& \frac{4^{\gamma + \frac{d}{2}}}{c_k^2}\langle T_{h\omega_k},\varphi\rangle.
\end{array}
\end{equation}
Thus the relations (\ref{hhhhh}),(\ref{uuuuu}) imply $$S =
T_{h\omega_k}.$$ This relation shows that the support $h$ is compact. Then $h \in L^2_{k,c}(I\!\!R^d)$

\subsection{ The Dunkl\
translation operator and the Dunkl convolution product}

\begin{Def}\hspace*{-2mm}. Let $y \, \in I\!\!R^{d}$. The Dunkl translation operator $f \mapsto \tau_y f$ is defined on ${\cal S}(I\!\!R^d)$ by
\begin{equation} \forall \, x \in I\!\!R^d, \;
{\cal F}_D (\tau_{y}f)(x)= K(-ix,y){\cal F}_D (f)(y). %
\label{2.38}%
\end{equation}
\end{Def}
\noindent{\bf{Example}} \\ \hspace*{5mm}Let $t > 0$, we have
\begin{equation}\label{tgsz}\forall \, x \in \, I\!\!R^d,
\;\tau_{x}(e^{-t||\xi||^2})(y) = \frac{M_k}{t^{\gamma +
\frac{d}{2}}}
K(\frac{x}{\sqrt{2t}},\frac{y}{\sqrt{2t}})
e^{-\frac{||x||^2 + ||y||^2}{4t}},
\end{equation}
with $M_k = (2^{\gamma+\frac{d}{2}}c_k)^{-1}$.\\
\noindent{\bf{Remark}}
\\ \hspace*{5mm} The operator $\tau_y$, $y
\in I\!\!R^d$, can also be defined on ${\cal E}(I\!\!R^d)$ by
\begin{equation}
\forall \, x \in I\!\!R^d, \;\tau_{y}f(x)= (V_k)_x (V_k)_y[(V_k)^{-1}(f)(x+y). %
\label{2.389}%
\end{equation}
(See \cite{T5}).\\ \hspace*{5mm} At the moment an explicit formula for the Dunkl translation operator is known only in the following two cases. \\
\underline{1$^{st}$ cas }: $d = 1$ and $W = {\bf Z}_2$. \\ For all $f
\in C(I\!\!R)$ we have $$\begin{array}{ccc}
\forall \, x \in I\!\!R, \tau_{y}f(x) & = & \frac{1}{2}\displaystyle\int_{-1}^{1}f(\sqrt{x^2 + y^2 -2xyt})
(1+\frac{x-y}{\sqrt{x^2 + y^2 -2xyt}})\Phi_k(t)dt\\
& + & \frac{1}{2}\displaystyle\int_{-1}^{1}f(-\sqrt{x^2 + y^2 -2xyt})
(1-\frac{x-y}{\sqrt{x^2 + y^2 -2xyt}})\Phi_k(t)dt,
\end{array}$$ where $$\Phi_k(t) = \frac{\Gamma(k+\frac{1}{2})}{\sqrt{\pi}%
\Gamma(k)} (1+t)(1-t^2)^{k-1}.$$ Moreover for all $f \in L^p_k(I\!\!R)$, $1 \leq p \leq \infty$, we have
$$ ||\tau_{y}f||_{k,p} \leq 3 ||f||_{k,p}, \quad 1 \leq p \leq \infty. $$ (See [10][13]).\\ \underline{2$^{nd}$ cas }: For all $f \in {\cal E}(I\!\!R^d)$ radial we have $$ \forall \, x \in
I\!\!R^d, \; \tau_{y}f(x) = V_k [f_0 (\sqrt{||x||^2 + ||y||^2 +2
\langle x,.\rangle })](y),$$ with $f_0$ the function on
$[0,+\infty[$ given by $f(x) = f_0(||x||)$. \\ Moreover for all $f
\in L^p_k(I\!\!R^d)$, $1 \leq p \leq \infty$, we have
$$ ||\tau_{y}f||_{k,p} \leq ||f||_{k,p}, \quad 1 \leq p \leq \infty. $$ (See [11][13]).\\ \hspace*{5mm}Using the Dunkl translation operator, we define the Dunkl convolution product of functions as follows (See [11][17]).
\begin{Def}\hspace*{-2mm}. For $f,g$ in $D(I\!\!R^d)$, we define the Dunkl convolution product by%
\begin{equation}
\forall \, x \in I\!\!R^d, \; f*_{D}g(x)=\int_{I\!\!R^d}\tau^{x}f(-y)g(y)d\omega_{k}(y).\label{2.42}%
\end{equation}
\end{Def}
This convolution is commutative and associative and satisfies the following properties. (See [13]).\\
\begin{equation}\hspace*{-97mm}i) {\cal F}_D (f*_{D}g) = {\cal F}_D (f){\cal F}_D (g).\end{equation}
\hspace*{5mm}ii) Let $1\leq p,q,r\leq+\infty,\;$such that $\frac{1}%
{p}+\frac{1}{q}-\frac{1}{r}=1.\;$If $f\;$is in $L^{p}_{k}(I\!\!R^d)$
radial and $g$ an element of $L^{q}_{k}(I\!\!R^d),\;$ then $f*_{D}g\;$
belongs to $L^{r}_{k}(I\!\!R^d)\;$ and we have
\begin{equation}
\left\| f*_{D}g\right\| _{r,k}\leq \left\| f\right\| _{p,k}
\left\| g\right\| _{q,k}.\label{2.43999}%
\end{equation}
\hspace*{5mm}iii) Let $d = 1$ and $W = {\bf Z}_2$. For all
$f\;$ in $L^{p}_{k}(I\!\!R)$ and $g\;$ an element of
$L^{q}_{k}(I\!\!R)$, the function $f*_{D}g$ belongs to
$L^{r}_{k}(I\!\!R)\;$ with $\frac{1}%
{p}+\frac{1}{q}-\frac{1}{r}=1.\;$ and we have
\begin{equation}
\left\| f*_{D}g\right\| _{r,k}\leq 3\left\| f\right\| _{p,k}
\left\| g\right\| _{q,k}.\label{2.43}%
\end{equation}

\section{Functions with compact spectrum } \hspace*{5mm} First we recall that the spectrum of a function is the support of its Dunkl transform. \\We begin this section by the following definition.
\begin{Def}\hspace*{-2mm}.
i) We define the support of $ g \in L^2_k(I\!\!R^d)$ and we denote it by
$\mbox{supp }\, g$,
the smallest closed set, outside which the function $g$
vanishes almost everywhere. \\ \hspace*{5mm}ii) We denote by $$R_g = \displaystyle \sup_{ \lambda \in suppg}
||\lambda||,$$ the radius of the support of $g$.\\
\noindent{{\bf{ Remark}}}\\ \hspace*{5mm} It is clear that $R_g$
is finite if and only if, $g$ has compact support.
\end{Def}
\begin{Prop}\hspace*{-2mm}. Let $g \in L^2_k(I\!\!R^d)$ such that for all $n \in I\!\!N$, the function
$||\lambda||^{2n}g(\lambda)$ belongs to $ L^2_k(I\!\!R^d)$. Then
\begin{equation}\label{g}
R_g = \lim_{n \to \infty}\left\{\displaystyle\int_{I\!\!R^d}||\lambda||^{4n}|g(\lambda)|^2
\omega_k(\lambda)d\lambda\right\}^{\frac{1}{4n}}.
\end{equation}
\end{Prop}
\noindent{\bf{Proof}}\\ \hspace*{5mm} We suppose that
$||g||_{k,2} \neq 0$, otherwise $R_g = 0$ and formula (\ref{g})
is trivial.\\ \hspace*{5mm}Assume now that $g$ has compact support with $R_g
> 0$. Then $$ \left\{\displaystyle\int_{I\!\!R^d} ||\lambda||^{4n} |g(\lambda)|^2
\omega_k(\lambda) d\lambda\right\}^{\frac{1}{4n}} \leq
\left\{\displaystyle\int_{||\lambda|| \leq R_g}|g(\lambda)|^2
\omega_k(\lambda)d\lambda\right\}^{\frac{1}{4n}}R_g.$$ Thus we deduce that
$$\limsup_{n \to
\infty}\left\{\displaystyle\int_{I\!\!R^d}||\lambda||^{4n}|g(\lambda)|^2
\omega_k(\lambda)d\lambda\right\}^{\frac{1}{4n}} \leq
\limsup_{n \to \infty}\left\{\displaystyle\int_{||\lambda|| \leq R_g}|g(\lambda)|^2
\omega_k(\lambda)d\lambda\right\}^{\frac{1}{4n}}R_g = R_g.$$ On the other hand, for any positive
$\varepsilon$ we have
$$\displaystyle\int_{R_g - \varepsilon \leq ||\lambda|| \leq R_g}|g(\lambda)|^2
\omega_k(\lambda)d\lambda > 0.$$ Hence
$$\liminf_{n \to \infty}\left\{\displaystyle\int_{I\!\!R^d}||\lambda||^{4n}|g(\lambda)|^2
\omega_k(\lambda)d\lambda\right\}^{\frac{1}{4n}} \geq
\liminf_{n \to \infty}\left\{\displaystyle\int_{R_g - \varepsilon \leq ||\lambda|| \leq R_g}
||\lambda||^{4n}|g(\lambda)|^2
\omega_k(\lambda)d\lambda\right\}^{\frac{1}{4n}} \geq R_g -
\varepsilon.$$
Thus
$$R_g = \lim_{n \to
\infty}\left\{\displaystyle\int_{I\!\!R^d}||\lambda||^{4n}|g(\lambda)|^2
\omega_k(\lambda)d\lambda\right\}^{\frac{1}{4n}}.$$
\hspace*{5mm} We prove now the assertion in the case where $g$
has unbounded support. Indeed For any positive $N$, we have
$$\displaystyle\int_{ ||\lambda|| \geq N}|g(\lambda)|^2
\omega_k(\lambda)d\lambda > 0.$$ Thus
$$\liminf_{n \to \infty}\left\{\displaystyle\int_{I\!\!R^d}||\lambda||^{4n}|g(\lambda)|^2
\omega_k(\lambda)d\lambda\right\}^{\frac{1}{4n}} \geq
\liminf_{n \to \infty}\left\{\displaystyle\int_{ ||\lambda|| \geq N}
||\lambda||^{4n}|g(\lambda)|^2
\omega_k(\lambda)d\lambda\right\}^{\frac{1}{4n}} \geq N.$$
This implies that $$\liminf_{n \to \infty}\left\{\displaystyle\int_{I\!\!R^d}||\lambda||^{4n}|g(\lambda)|^2
\omega_k(\lambda)d\lambda\right\}^{\frac{1}{4n}} = \infty.$$
\noindent{\bf{Notations.}} We denote by\\
\hspace*{5mm} - $L^2_{k,R}(I\!\!R^d) := \{g \in L^2_{k,c}(I\!\!R^d) / R_g =
R\}$, for $R \geq 0$.\\ \hspace*{5mm} - $D_{R}(I\!\!R^d) := \{g \in D(I\!\!R^d) / R_g =
R\}$, for $R \geq 0$.
\begin{Def}\hspace*{-2mm}.
We define the Paley-Wiener spaces $PW^2_k(I\!\!R^d)$ and $PW^2_{k,R}(I\!\!R^d)$ as follows\\
i) $PW^2_k(I\!\!R^d)$ is the space of functions $f \in {\cal E}(I\!\!R^d)$ satisfying\\
\hspace*{5mm} a) $\triangle_k^n f \in L^2_{k}(I\!\!R^d)$ for all
$n \in I\!\!N$.\\ \hspace*{5mm} b) $R_f^{\triangle_k} := \displaystyle \lim_{n \to \infty}
||\triangle_k ^n f||_{k,2}^{\frac{1}{2n}} < \infty.$\\
ii) $PW^2_{k,R}(I\!\!R^d) := \{f \in PW^2_k(I\!\!R^d) / R_f^{\triangle_k}
= R\}$.
\end{Def}

The real $L^2$-Paley-Wiener theorem for the Dunkl transform can be formulated as follows
\begin{Th}\hspace*{-2mm}.
The Dunkl transform ${\cal F}_D$ is a bijection \\
\hspace*{5mm}i) from $PW^2_{k,R}(I\!\!R^d)$ onto
$L^2_{k,R}(I\!\!R^d)$.\\ \hspace*{5mm} ii)from $PW^2_k(I\!\!R^d)$
onto $L^2_{k,c}(I\!\!R^d)$,\\
\end{Th}
\noindent{\bf{Proof}}\\ \hspace*{5mm} i) Let $g \in PW^2_{k,R}(I\!\!R^d)$. Then from Proposition 2.7 the function ${\cal F}_D(\triangle_k^n g)(\xi) = (-1)^n ||\xi||^{2n}{\cal F}_D(g)(\xi)
$ belongs to $ L^2_{k}(I\!\!R^d)$ for all $n \in I\!\!N$. On the other hand from Theorem 2.3 we deduce that $$ \lim_{n \to
\infty}\left\{\displaystyle\int_{I\!\!R^d}||\xi||^{4n}|{\cal F}_D(g)(\xi)|^2
\omega_k(\xi)d\xi\right\}^{\frac{1}{4n}} = \lim_{n \to
\infty}\left\{\displaystyle\int_{I\!\!R^d}|\triangle_k g(x)|^2
\omega_k(x)dx\right\}^{\frac{1}{4n}} = R.$$
Thus using Proposition 3.2 we conclude that ${\cal F}_D(g)$ has compact support with $ R_{{\cal F}_D(g)} = R$.\\ \hspace*{5mm}
Conversely let $f \in L^2_{k,R}(I\!\!R^d)$. Then $||\xi||^n f(\xi) \in L^1_k(I\!\!R^d)$ for any $n \in I\!\!N$, and ${\cal F}_D^{-1} f \in D(I\!\!R^d)$. On the other hand from Theorem 2.3 we have $$ \lim_{n
\to \infty}\left\{\displaystyle\int_{I\!\!R^d}|\triangle_k^n ({\cal F}_D^{-1}
f)(x)|^2
\omega_k(x)dx\right\}^{\frac{1}{4n}} = \lim_{n \to
\infty}\left\{\displaystyle\int_{I\!\!R^d}||\xi||^{4n}|f(\xi)|^2
\omega_k(\xi)d\xi\right\}^{\frac{1}{4n}} = R.$$
Thus ${\cal F}_D^{-1}
(f) \in PW^2_{k,R}(I\!\!R^d)$.\\ \hspace*{5mm}ii) We deduce ii) from i).
\begin{Cor}\hspace*{-2mm}.
The Dunkl transform ${\cal F}_D$ is a bijection from $PW^2_{k}(I\!\!R^d)$ onto
${\cal H}_{L^2_k}(I\!\!\!\!C^d)$.
\end{Cor}
\noindent{\bf{Proof}}\\ \hspace*{5mm} We deduce the result from Theorem 3.4 ii) and Theorem 2.8.
\begin{Def}\hspace*{-2mm}.
i) The Paley-Wiener space $PW_k(I\!\!R^d)$ is the space of functions $f \in {\cal E}(I\!\!R^d)$ satisfying\\
\hspace*{5mm} a) $(1+||x||)^m \triangle_k^n \in L^2_{k}(I\!\!R^d)$ for all
$n$,$m$ $\in$ $I\!\!N$.\\ \hspace*{5mm} b) $R_f^{\triangle_k} := \lim_{n \to \infty}
||\triangle_k ^n f||_{k,2}^{\frac{1}{2n}} < \infty.$\\
\hspace*{5mm} ii) We have $PW_{k,R}(I\!\!R^d) := \{f \in PW_k(I\!\!R^d) /
R_f^{\triangle_k} = R\}$, for $R \geq 0$.
\end{Def}
\noindent{{\bf{Remark}}}\\ \hspace*{5mm}
We notice that the only difference between $PW_k^2(I\!\!R^d)$ and
$PW_k(I\!\!R^d)$is the extra requirement of polynomial decay to help ensure that ${\cal F}_D (f) \in {\cal E}(I\!\!R^d)$.\\

The real Paley-Wiener theorem for the Dunkl transform of functions in the preceding spaces is the following
\begin{Th}\hspace*{-2mm}.
The Dunkl transform ${\cal F}_D$ is a bijection \\ \hspace*{5mm}i) from $PW_{k,R}(I\!\!R^d)$ onto
$D_{R}(I\!\!R^d)$.\\\hspace*{5mm}ii) from $PW_k(I\!\!R^d)$
onto $D(I\!\!R^d)$.\\
\end{Th}
\noindent{\bf{Proof}}\\ \hspace*{5mm} i)Let $g \in PW_{k,R}(I\!\!R^d)
\subset PW^2_{k,R}(I\!\!R^d)$. Then ${\cal F}_D (g) \in {\cal E}(I\!\!R^d)$ since $g$ has polynomial decay, and by Theorem 3.4 the function ${\cal F}_D (g)$ has compact support with $R_{{\cal F}_D
(g)} = R$.\\ \hspace*{5mm} Conversely Let $f \in D_R (I\!\!R^d)$, then
${\cal F}_D^{-1}(f) \in {\cal S}(I\!\!R^d)$ and ${\cal F}_D^{-1}(f)
\in PW^2_{k,R}(I\!\!R^d)$ by Theorem 3.4.\\ \hspace*{5mm}ii) We deduce the result from the i).
\section{Dunkl transform of functions, with polynomial domain support} Let $P(x)$ be a non-constant polynomial.
\begin{Th}\hspace*{-2mm}. For any function $f \in {\cal S}(I\!\!R^d)$ the following relation holds
\begin{equation}\label{aze}
\lim_{n\to\infty} ||P(iT)^n f||_{k,p}^{\frac{1}{n}} = \sup_{y \in supp {\cal F}_D(f)}|P(y)|,
\;1
\leq p \leq \infty,
\end{equation}
with $T = (T_1,...,T_d)$.
\end{Th}
\noindent{\bf{Proof}}\\ \hspace*{5mm} We consider $ f \neq 0$ in
${\cal S}(I\!\!R^d)$. Set $q = \frac{p}{p-1}$ if $1 < p < \infty$ and
$q = 1$ or $\infty$ if $p = \infty$ or $1$.\\ The proof is divided in several steps.\\ In the following three steps we suppose that
\begin{equation}\label{wahid}
0 < \sup_{y \in supp \, {\cal F}_D(f)}|P(y)| < \infty.\end{equation}
\\
{\bf{\underline{First step}}}: In this step we shall prove that
$$\limsup_{n\to\infty} ||P(iT)^n f||_{k,p}^{\frac{1}{n}} \leq
\sup_{y \in supp {\cal F}_D(f)}|P(y)|, \; 1 \leq p \leq \infty.$$
$\bullet$ Let $2
\leq p < \infty$.
Applying Proposition \ref{cher}
we obtain
\begin{eqnarray}\label{l15}
||P(iT)^n f||_{k,p} &\leq& C ||P(\xi)^n {\cal F}_D(f)||_{k,q},\\
\\
& \leq & C (\sup_{y \in supp {\cal F}_D(f)}|P(y)|)^n || {\cal F}_D(f)
||_{k,q}. \nonumber
\end{eqnarray}
Thus
\begin{equation}\label{l16}
\limsup_{n\to\infty} ||P(iT)^n f||_{k,p}^{\frac{1}{n}} \leq
\sup_{y \in supp {\cal F}_D(f)}|P(y)|.
\end{equation}

\noindent$\bullet$ Suppose now that $1 \leq p < 2$.
H\"older's inequality gives
\begin{equation}\label{sour}
||f||_{k,p}^{p} =
\displaystyle\int_{I\!\!R^d}(1+||x||^{2})^{-rp}|(1+||x||^{2})^{r}f(x)|^{p}\omega_k(x)dx
\leq
||(1+||x||^{2})^{r}f||_{k,2}^{p}||(1+||x||^{2})^{-rp}||_{k,\frac{2}{2-p}}.
\end{equation}
$$ \leq C ||(1+||x||^{2})^{r}f||_{k,2}^{p}, $$ for $r > 2\gamma
+d$.\\ Thus, from Proposition 2.7 we obtain $$ ||f||_{k,p}^{p}
\leq C ||(I - \triangle_k )^{r}[{\cal F}_D(f)]||_{k,2}^{p}.$$\\
Consequently for all $n \in I\!\!N$, we deduce that
\begin{equation}\label{kopm}
||P^n (iT) f||_{k,p} \leq C^{\frac{1}{p}}||(I - \triangle_k
)^{r}[P^n (\xi){\cal F}_D(f)]||_{k,2}.
\end{equation}
On the other hand from Proposition 5.1 of [9] we have, the following relation:

For all $\mu \in I\!\!N^d\backslash\{0\}$ there exist: $t_p ^0, t_p ^1
\in [0,1]$,$p=1,...,|\mu|-1$, such that for all $u \in {\cal E}(I\!\!R^d)$ we have
\begin{eqnarray}\label{65}
T^{\mu} u(x) & = & D^{\mu} u(x) + \displaystyle\sum_{\alpha \in R_+
}\{\displaystyle\sum_{|\beta|=|\mu|}\displaystyle\sum_{p=1}^{|\mu|-1}
Q_{\mu}(t_1 ^0,...,t_p ^0) D^{\beta}u\big(x - S_{\mu}(t_1 ^0
,...,t_p ^0 )<\alpha,x>\alpha\big) \nonumber
\\
& + & \displaystyle\sum_{|\beta'|=|\mu|}
P_{\mu}(t_1 ^1,...,t_{|\mu|-1}^1)
D^{\beta'}u\big(x - \widetilde{S}_{\mu}(t_1 ^1
,...,t_{|\mu|-1}^1)<\alpha,x>\alpha\big)
\},
\end{eqnarray} where
$Q_{\mu}(t_1,...,t_p),S_{\mu}(t_1,...,t_p)$, $p=1,...,|\mu|$ and
$P_{\mu}(t_1,...,t_{|\mu|-1}),\widetilde{S}_{\mu}(t_1,...,t_{|\mu|-1})$
are polynomials of degree at most $|\mu|$,with respect to each variable.\\ From this relation and by induction one can show that
\begin{equation}\label{htfv}
||(I - \triangle_k )^{r}[P^n (\xi){\cal F}_D(f)(\xi)]||_{k,2} \leq C n^{2r} ||P^{n-2r}(\xi)\varphi_n(\xi)||_{k,2}, \; n > 2r,
\end{equation}
with $supp \, \varphi_n \subset supp \,{\cal F}_D(f)$ and
$||\varphi_n||_{k,2} \leq C_1 $, where $C_1$ is a constant independent of $n$.\\ Hence, from the previous inequalities we deduce that
\begin{eqnarray}\label{nbvc}\nonumber
||P^n (iT) f||_{k,p} &\leq& C^{\frac{1}{p}}n^{2r}
||P^{n-2r}(\xi)\varphi_n(\xi)||_{k,2} \leq C^{\frac{1}{p}}n^{2r}
\sup_{y \in supp {\cal F}_D(f)}|P(y)|^{n-2r}
||\varphi_n(\xi)||_{k,2}\\ \nonumber\\ &\leq& C^{\frac{1}{p}}C_1 n^{2r}\sup_{y \in supp {\cal F}_D(f)}|P(y)|^{n-2r} .
\end{eqnarray}
Thus
\begin{equation}\label{lon}
\limsup_{n\to\infty} ||P(iT)^n f||_{k,p}^{\frac{1}{n}} \leq
\sup_{y \in supp {\cal F}_D(f)}|P(y)|.
\end{equation}
$\bullet$ Let now $p = \infty$. From the relation (22) We have $$
||f||_{\infty,k} \leq \frac{c_k^2}{4^{\gamma+ \frac{d}{2}}} ||{\cal F}_D(f)||_{k,1}.
$$
On the other hand, from Cauchy-Schawrz's inequality we obtain
$$
||{\cal F}_D(f)||_{k,1} \leq C_0 ||(1+||\xi||^2)^{\frac{2\gamma+d}{2}}{\cal F}_D(f)(\xi)||_{k,2},
$$
where $C_0$ is a positive constant.\\
Combining the previous inequalities and replacing $f$ by $P(iT)^n f$, we deduce that there exists a positive constant $C$ such that
\begin{equation}\label{hgfd}
||P(iT)^n f||_{k,\infty} \leq C ||P^n (\xi)(1+||\xi||^2)^{\frac{2\gamma+d}{2}}{\cal F}_D(f)(\xi)||_{k,2}.
\end{equation}
Consequently,
\begin{equation}\label{szaq}
\limsup_{n \to \infty} ||P(iT)^n f||_{k,\infty}^{\frac{1}{n}} \leq
\sup_{y \in supp \, (1+||\xi||^2)^{\frac{2\gamma+d}{2}}{\cal F}_D(f)}|P(y)| = \sup_{y \in supp \, {\cal F}_D(f)}|P(y)|.
\end{equation}
Thus from (44), (50) and (52) we have
\begin{equation}\label{pagyz}
\limsup_{n \to \infty} ||P(iT)^n f||_{k,p}^{\frac{1}{n}} \leq
\sup_{y \in supp \, {\cal F}_D(f)}|P(y)|, \; 1 \leq p \leq \infty.
\end{equation}
{\bf{ \underline{Second step}}}: In this step we want to prove that $$ \lim_{n \to \infty} ||P(iT)^n f||_{k,2}^{\frac{1}{n}} =
\sup_{y \in supp \, {\cal F}_D(f)}|P(y)|.$$
For any $\varepsilon$, $0 < \varepsilon < \sup_{y \in supp \, {\cal F}_D(f)}|P(y)|$, there exists a point $x_0 \in \sup_{y \in supp \, {\cal F}_D(f)}|P(y)|$ such that
$$
|P(x_0)| > \sup_{y \in supp \, {\cal F}_D(f)}|P(y)| - \frac{\varepsilon}{2}
$$ As $P$ is a continuous function, there exists a neighborhood
$U_{x_0}$ such that $$
|P(x)| > \sup_{y \in supp \, {\cal F}_D(f)}|P(y)| - \varepsilon, \; x \in U_{x_0}
$$ From Theorem 2.3 we deduce that $$
\begin{array}{lll}
||P(iT)^n f||_{k,2} &=& \frac{c_k^2}{4^{\gamma+
\frac{d}{2}}}||P(\xi)^n {\cal F}_D(f)||_{k,2} \\ &\geq&
\frac{c_k^2}{4^{\gamma+ \frac{d}{2}}} ||P(\xi)^n {\cal F}_D(f)1_{U_{x_0}}||_{k,2},\end{array} $$ where $1_{U_{x_0}}$ is the characteristic function of $U_{x_0}$.\\ Thus $$ ||P(iT)^n f||_{k,2} \geq \frac{c_k^2}{4^{\gamma+ \frac{d}{2}}}(\sup_{y \in supp {\cal F}_D(f)}|P(y)| - \varepsilon)^n || {\cal F}_D(f) 1_{U_{x_0}}||_{k,2}$$
This inequality implies,
\begin{equation}\label{usaki}
\liminf_{n \to \infty} ||P(iT)^n f||_{k,2}^{\frac{1}{n}} \geq
(\sup_{y \in supp {\cal F}_D(f)}|P(y)| - \varepsilon) \lim_{n \to
\infty}|| {\cal F}_D(f) 1_{U_{x_0}}||_{k,2}^{\frac{1}{n}} =
\sup_{y \in supp \, {\cal F}_D(f)}(|P(y)| - \varepsilon).
\end{equation}
But $\varepsilon$ can be chosen arbitrarily small, thus from
(\ref{pagyz}) and (\ref{usaki}) the relation (40) follows for
$p = 2$.\\ \noindent{\bf{ \underline{Third step}}}: In this step we shall prove that $$ \liminf_{n \to \infty} ||P(iT)^n f||_{k,p}^{\frac{1}{n}} \geq
\sup_{y \in supp \, {\cal F}_D(f)}|P(y)|, \;1 \leq p \leq \infty.$$
Since $f \in {\cal S}(I\!\!R^d)$, the iteration of the relation (7)
implies the relation
\begin{equation}\label{bhjt}
\displaystyle\int_{I\!\!R^d} \overline{P^n (-iT) f(x)} P^n (iT) f(x)\omega_k(x)dx
= \displaystyle\int_{I\!\!R^d}\overline{f(x)} P^{2n} (iT) f(x)\omega_k(x)dx.
\end{equation}
Hence, by H\"older's inequality,
\begin{equation}\label{kkkki}
||P^n (iT) f||_{k,2}^2 \leq ||f||_{k,q}||P^{2n} (iT) f||_{k,p}.
\end{equation}
Consequently
\begin{equation}\label{kkkki1}
\lim_{n \to \infty}||P^n (iT) f||_{k,2}^{\frac{1}{n}} \leq
(\lim_{n \to \infty}||f||_{k,q}^{\frac{1}{2n}})\liminf_{n \to
\infty}||P^{2n} (iT) f||_{k,p}^{\frac{1}{2n}} = \liminf_{n \to
\infty}||P^{2n} (iT) f||_{k,p}^{\frac{1}{2n}}.
\end{equation}
Applying now the relation (40) with $p = 2$, we conclude that
\begin{equation}\label{ijc}
\sup_{y \in supp \, {\cal F}_D(f)}|P(y)| = \lim_{n \to \infty}||P^n (iT)
f||_{k,2}^{\frac{1}{n}}\leq \liminf_{n \to
\infty}||P^{2n} (iT) f||_{k,p}^{\frac{1}{2n}}.
\end{equation}
We replace in formula (\ref{kkkki}) the function $f$ by $P(iT) f$ and we obtain
\begin{equation}\label{pluyt}
||P^{n+1} (iT) f||_{k,2}^2 \leq ||P(iT)f||_{k,q}||P^{2n+1} (iT)
f||_{k,p}.
\end{equation}
Thus
\begin{equation}\label{tfgr}
\sup_{y \in supp \, {\cal F}_D(f)}|P(y)| = \lim_{n \to \infty}||P^{n+1} (iT)
f||_{k,2}^{\frac{1}{n+1}}\leq \liminf_{n \to
\infty}||P^{2n+1} (iT) f||_{k,p}^{\frac{1}{2n+1}}.
\end{equation}
Using (\ref{ijc}) and (\ref{tfgr}) we deduce that
\begin{equation}\label{yhwa}
\sup_{y \in supp \, {\cal F}_D(f)}|P(y)| \leq \liminf_{n \to
\infty}||P^{n} (iT) f||_{k,p}^{\frac{1}{n}}.
\end{equation}
Then formulas (\ref{yhwa}) and (53) give (40). Thus we have proved the theorem under the condition (\ref{wahid}).\\
\noindent{\bf{ \underline{Fourth step}}}: Suppose now $\sup_{y \in supp {\cal F}_D(f)}|P(y)| = +\infty.$ Then for any $N
> 0$ there exists a point $x_0 \in supp {\cal F}_D(f)$ such that
$|P(x_0)| \geq 2N$. Since $P$ is a continuous function there exists a neighborhood $U_{x_0}$ of $x_0$ on which $|P(x)| > N$.
Similarly that the previous calculation of second step we obtain
$$
\begin{array}{lll} \liminf_{n \to \infty} ||P(iT)^n f||_{k,2}^{\frac{1}{n}} &\geq& \frac{c_k^2}{4^{\gamma+
\frac{d}{2}}} \liminf_{n \to \infty}||P^n(\xi) {\cal F}_D(f)1_{U_{x_0}}||_{k,2}^{\frac{1}{n}} ,\\
\\
& \geq & N \liminf_{n \to \infty}|| f 1_{U_{x_0}}||_{k,2}^{\frac{1}{n}} = N. \end{array}$$ We choose $N$
large, we obtain $$\lim_{n \to \infty} ||P(iT)^n f||_{k,2}^{\frac{1}{n}} = \infty. $$ Finally if $\sup_{y \in supp
{\cal F}_D(f)}|P(y)| = 0$ the identity (40) is clear for $p =
2$.\\ Hence the proof of the theorem is finished.
\begin{Def}\hspace*{-2mm}.
Let $P$ be a non-constant polynomial and $U_p = \{x \in I\!\!R^d, \,
|P(x)| \leq 1 \}$. The set $U_P$ is called a polynomial domain in
$I\!\!R^d$.
\end{Def}
\noindent{\bf{Remark}} \\ \hspace*{5mm} A disc is a polynomial domain. A polynomial domain may be unbounded and nonconvex, for example $U = \{x \in I\!\!R^d, \, |x_1... x_d| \leq 1 \}$.\\

We have the following result.
\begin{Cor}\hspace*{-2mm}. Let $f \in {\cal S}(I\!\!R^d)$.
The Dunkl transform ${\cal F}_D(f)$ vanishes outside a polynomial domain $U_P$, if and only if,
\begin{equation}\label{l14}
\limsup_{n\to\infty} ||P(iT)^n f||_{k,p}^{\frac{1}{n}} \leq 1, \; 1
\leq p \leq \infty.
\end{equation}
\end{Cor}
\noindent{\bf{Remark}} \\ \hspace*{5mm}i) If we take $P(y) = -
||y||^2$, then $P(iT) = \triangle_k$, and Theorem 4.1 and Corollary 4.3 characterize functions such that the support of their Dunkl transform is a ball.\\ \hspace*{5mm}ii) Theorem 4.1 and Corollary 4.3 generalize also the result obtained in [3].
\section{ Dunkl transform of functions vanishing on a Ball} The following theorem gives the radius of the large disc on which the Dunkl transform of functions in $L^2_{k}(I\!\!R^d)$ vanishes every where.
\begin{Th}\hspace*{-2mm}.
Let $f \in L^2_{k}(I\!\!R^d)$. We consider the sequence
\begin{equation}\label{l}
f_n(x) = E_n *_D f(x), \; x \in I\!\!R^d, \, n \in I\!\!N
\backslash\{0\}.
\end{equation}
where $$ E_n(y) = \frac{c_k}{(4n)^{\gamma +
\frac{d}{2}}}e^{-\frac{||y||^2}{4n}}$$
Then
\begin{equation}\label{ll}
\lim_{n \to \infty}\sqrt{-\frac{1}{n}\ln ||f_n||_{k,2}} =
\lambda_{{\cal F}_D(f)},
\end{equation}
where
\begin{equation}\label{lll}
\lambda_{{\cal F}_D(f)} = \displaystyle\inf
\displaystyle\left\{||\xi||, \; \xi \in supp {\cal F}_D(f)\right\}.
\end{equation}\label{TP}
\end{Th}
\noindent{\bf{Remark}}\\ \hspace*{5mm} The function $E_n$ is the Gauss kernel associated with Dunkl operators. From [11] p. 2424,
we have
\begin{equation}\label{zzzzz}
\forall \, x \in I\!\!R^d, \; {\cal F}_D (E_n)(x) = e^{-n||x||^2}.
\end{equation}

\noindent{\bf{Proof of Theorem \ref{TP}}}\\ \hspace*{5mm} First we remark that from (37) the function $f_n$ is well defined.
We assume that $||f||_{k,2}
> 0
$, otherwise the result is trivial. To prove (\ref{ll}) it is sufficient to verify the equivalent identity
\begin{equation}\label{llll}
\lim_{n \to \infty} ||f_n||_{k,2}^{\frac{1}{n}} = \exp( -
\lambda_{{\cal F}_D(f)}^2).
\end{equation}
Using (\ref{zzzzz}) and (37) we deduce that the Dunkl transform of
$f_n(x)$ is $ \exp(-n||\xi||^2){\cal F}_D(f)(\xi)$. Then by applying Theorem 2.3 we obtain
\begin{eqnarray}\label{lllll}
||f_n||_{k,2} &=&
\frac{c_k}{2^{\gamma+\frac{d}{2}}}||\exp(-n||\xi||^2){\cal F}_D(f)(\xi)||_{k,2} \\ &=&
\frac{c_k}{2^{\gamma+\frac{d}{2}}}||f||_{k,2} \{\displaystyle\int_{supp {\cal F}_D(f)}\exp(-2n||\xi||^2)\displaystyle\frac{|{\cal F}_D(f)(\xi)|^2}{||f||_{k,2}^2}
\omega_k(\xi)d\xi\}^{\frac{1}{2}}.\nonumber
\end{eqnarray}
On the other hand it is known that if $m$ is the Lebesque measure on $I\!\!R^d$ and $U$ a subset of $I\!\!R^d$ such that $m(U) = 1$, then for all $\phi$ in the Lebesgue space $L^p(U,dm)$, $1 \leq p \leq
+\infty$, we have
\begin{equation}\label{l6}
\lim_{p\to \infty}||\phi||_{L^p(U;dm)} =
||\phi||_{L^\infty(U;dm)}.
\end{equation}
By applying formula (\ref{l6}) with $$U = supp {\cal F}_D(f),
\;\phi = \exp(-||\xi||^2), \; p = 2n, \; \mbox{ and} \;dm(\xi) =
\displaystyle\frac{|{\cal F}_D(f)(\xi)|^2}{||f||_{k,2}^2}\omega_k(\xi)d\xi,$$
and using the fact that $\lim_{n \to +\infty} (\frac{c_k
||f||_{k,2} }{2^{\gamma+\frac{d}{2}}})^{\frac{1}{n}} = 1$.\\ We obtain
\begin{equation}\label{l7}
\lim_{n \to \infty}||f_n||_{k,2} = \sup_{\xi \in supp {\cal F}_D(f)}\exp(-||\xi||^2) = \exp( - \lambda_{{\cal F}_D(f)}^2).
\end{equation}
Which is the relation (\ref{llll}).\\

\hspace*{5mm} A function $f \in L^2_{k}(I\!\!R^d)$ is the Dunkl transform of a function vanishing in a neighborhood of the origin,
if and only if, $\lambda_{{\cal F}_D(f)} > 0$, or equivalently, if and only if the limit (\ref{llll}) is less than $1$. Thus we have proved the following result.
\begin{Cor}\hspace*{-2mm}.\label{har}
The condition
\begin{equation}\label{l8}
\lim_{n \to \infty} ||f_n||_{k,2}^{\frac{1}{n}} < 1,
\end{equation}
is necessary and sufficient for a function $f \in L^2_{k}(I\!\!R^d)$
to have its Dunkl transform vanishing in a neighborhood of the origin

\end{Cor}
\noindent{\bf{Remark}}\\ \hspace*{5mm} From Theorem 3.3 and Corollary \ref{har} it follows that the support of the Dunkl transform of a function in $L^2_{k}(I\!\!R^d)$ is in the tore
$\lambda_{{\cal F}_D(f)} \leq ||\xi|| \leq R_{{\cal F}_D(f)}$, if and only if,
\begin{equation}\label{l9}
\lambda_{{\cal F}_D(f)} \leq \lim_{n \to
\infty}\sqrt{-\frac{1}{n}\ln ||f_n||_{k,2}}\leq \lim_{n \to
\infty} ||\triangle_k^n f||_{k,2}^{{\frac{1}{2n}}} \leq R_{{\cal F}_D(f)}.
\end{equation}
\begin{Th}\hspace*{-2mm}. For any function $f \in {\cal S}(I\!\!R^d)$ the following relation holds
\begin{equation}\label{pknfr}
\lim_{n \to \infty} ||\displaystyle\sum_{m = 0}^{\infty}
\frac{(n\triangle_k)^m\, f }{m!}||_{k,p}^{\frac{1}{n}} = \exp( -
\lambda_{{\cal F}_D(f)}^2), \; 1 \leq p \leq \infty.
\end{equation}
In particular, a function $f \in {\cal S}(I\!\!R^d)$ is the Dunkl transform of a function in ${\cal S}(I\!\!R^d)$ vanishing in the ball
$B(o,r)$ of center $o$ and radius $r$, if and only if we have
\begin{equation}\label{pknfr45}
\lim_{n \to \infty} ||\displaystyle\sum_{m = 0}^{\infty}
\frac{(n\triangle_k)^m\, f }{m!}||_{k,p}^{\frac{1}{n}}\leq
\exp(-r^2), \; 1 \leq p \leq \infty.
\end{equation}
\end{Th}
\noindent{\bf{Proof}}\\ \hspace*{5mm} A similar proof to that of Theorem 4.1, gives the result.
\section{Dunkl transform of functions, vanishing outside a symmetric body }
\hspace*{5mm} A subset $K$ of $I\!\!R^d$ is called a symmetric body if $-x \in K$ for all $x \in K$. The set $K^* := \{y \in I\!\!R^d, \;
\langle x,y\rangle \leq 1 \; for \, all \, x \in K \}$ is called the polar set of $K$.
We state now the following another real Paley-Wiener theorem.
\begin{Th}\hspace*{-2mm}.
A function $f \in {\cal E}(I\!\!R^d)$ is the Dunkl transform of a function in $L^2_{k}(I\!\!R^d)$ vanishing outside a symmetric body
$K$, if and only if, $T^\mu f$ belongs to $L^2_{k}(I\!\!R^d)$ for all
$\mu = (\mu_1,...,\mu_d) \in I\!\!N^d$, and for all $n \in I\!\!N$ we have
\begin{equation}\label{l10}
\sup_{a \in K^*}||(\langle a,T\rangle)^n f||_{k,2} \leq
||f||_{k,2},
\end{equation}
where $T = (T_1,...,T_d)$.
\end{Th}
\noindent{\bf{Proof}}\\ \hspace*{5mm} Let $f \in {\cal E}(I\!\!R^d)$
assume $f \neq 0$, otherwise the result is clear. We suppose that
$ {\cal F}_D(f)$ which belongs in $ L^2_{k}(I\!\!R^d)$ vanishes out side a symmetric body $K$. Then $f$ is infinitely differentiable and belongs to $L^2_{k}(I\!\!R^d)$ together with $T^\mu f$ for all
$\mu = (\mu_1,...,\mu_d) \in I\!\!N^d$. As the Dunkl transform of $(i
(\langle a,\xi\rangle)^n {\cal F}_D(f)(-\xi)$ is $(\langle a,T\rangle)^n f$, then by applying Theorem 2.3, we obtain
\begin{equation}\label{l11}
||(\langle a,T\rangle)^n f||_{k,2} =
\frac{c_k}{2^{\gamma+\frac{d}{2}}}||(\langle a,\xi\rangle)^n {\cal F}_D(f)(\xi)||_{k,2}.
\end{equation}
As $K$ satisfies the symmetric property, we deduce that $|\langle a,\xi\rangle| \leq 1$ for all $\xi \in K$ and $a \in K^*$. Hence
$$\begin{array}{lll} ||(\langle a,\xi\rangle)^n {\cal F}_D(f)(.)||_{k,2}^2 &=& \displaystyle\int_{K}|(\langle a,\xi\rangle)^n {\cal F}_D(f)(\xi)|^2\omega_k(\xi)d\xi \\ &\leq& \displaystyle\int_{K}|{\cal F}_D(f)(\xi)|^2\omega_k(\xi)d\xi =
\frac{4^{\gamma+\frac{d}{2}}}{c_k^2} ||f||_{k,2}^2.\end{array}$$
Thus $$ \sup_{a \in K^*}||(\langle a,T\rangle)^n f||_{k,2} \leq
||f||_{k,2}.$$
\hspace*{5mm} Conversely, we assume that the inequality
(\ref{l10}) is valid for all $n \in I\!\!N$. Since $T^\mu f \in L^2_{k}(I\!\!R^d)$ for all $\mu = (\mu_1,...,\mu_d) \in I\!\!N^d$. Thus from Proposition 2.7 and Theorem 2.3 and the inequality (\ref{l10}) we obtain for all
$n \in I\!\!N$:
\begin{equation}\label{l12}
\sup_{a \in K^*}||(\langle a,\xi\rangle)^n {\cal F}_D(f)(\xi)||_{k,2} = \frac{2^{\gamma+\frac{d}{2}}}{c_k}\sup_{a
\in K^*}||(\langle a,T\rangle)^n f||_{k,2} \leq
\frac{2^{\gamma+\frac{d}{2}}}{c_k}||f||_{k,2}.
\end{equation}
Let $\xi_0 \notin K$, that means there exists $a_0 \in K^*$ such that $\langle\xi_0,a\rangle > 1$. Then there is a neighborhood
$U_{\xi_0}$ of $\xi_0$ with the property $\langle\xi,a\rangle >
\displaystyle\frac{1+ \langle\xi_0,a\rangle }{2} > 1$, for all $\xi \in U_{\xi_0}$. Thus for all $n \in I\!\!N$:
\begin{eqnarray}\label{l13}\nonumber
\frac{2^{\gamma+\frac{d}{2}}}{c_k}||f||_{k,2} &\geq& \sup_{a \in K^*}||(\langle a,\xi\rangle)^n {\cal F}_D(f)(\xi)||_{k,2} \geq
(\displaystyle\int_{U_{\xi_0}}|(\langle a,\xi\rangle)^n {\cal F}_D(f)(\xi)|^2\omega_k(\xi)d\xi)^{\frac{1}{2}}\\ \\ &\geq&
(\displaystyle\frac{1+ \langle\xi_0,a\rangle }{2})^n(\displaystyle\int_{U_{\xi_0}}|{\cal F}_D(f)(\xi)|^2\omega_k(\xi)d\xi)^{\frac{1}{2}}. \nonumber
\end{eqnarray}
Since $(\displaystyle\frac{1+ \langle\xi_0,a\rangle }{2})^n$ approaches
$\infty$ as $n \to \infty$, (\ref{l13}) holds only if
$$\displaystyle\int_{U_{\xi_0}}|{\cal F}_D(f)(\xi)|^2\omega_k(\xi)d\xi = 0,$$
this implies that $\xi_0$ does not belongs to the support of
${\cal F}_D(f)$. Hence ${\cal F}_D(f) \subset K$, and Theorem 6.1 is proved.\vspace*{5mm}
\title{Hot Gas in Galaxy Clusters: Theory and Simulations}

\begin{abstract}
We review the theory of the formation of galaxy clusters and discuss their role as cosmological probes. We begin with the standard cosmological framework where we discuss the origin of the CDM matter power spectrum and the growth of density fluctuations in the linear regime. We then summarize the spherical top-hat model for the nonlinear growth of fluctuations from which scaling relations and halo statistics are derived.
Numerical methods for simulating gas in galaxy clusters are then overviewed with an emphasis on multiscale hydrodynamic simulations of cluster ensembles. Results of hydrodynamic AMR simulations are described which compare cluster internal and statistical properties as a function of their assumed baryonic processes. Finally, we compare various methods of measuring cluster masses using X-ray and the thermal Sunyaev-Zeldovich effect (SZE). We find that SZE offers great promise for precision measurements in raw samples of high-z clusters.
\end{abstract}

\section{Introduction}
The Sunyaev-Zeldovich Effect (SZE) detectable in galaxy clusters has emerged as a powerful new probe of the low to intermediate redshift universe (see articles by Birkinshaw {\&} Rephaeli in this volume, as well the review by Carlstrom, et al. \cite{carlstrom02}. Within the prevailing theory of cosmological structure formation, galaxy clusters form in rare, massive peaks of the cosmic density field. Because of natural biasing, such regions get a ``head start'' on structure formation on all scales smaller than the cluster scale.
As a consequence, galaxy clusters at the present epoch contain the oldest objects in the universe in an evolutionary sense \cite{springel05}.
This makes galaxy clusters intrinsically interesting as astrophysical objects, worthy of study observationally, theoretically, and computationally.

However, much of the current interest stems from the potential use of galaxy clusters as cosmological probes. As discussed in more detail below, the space density of galaxy clusters as a function of cosmological redshift is sensitive to the RMS mass fluctuations on scales of 10$^{14-15} M_{\odot}$,
which depends on $\Omega _{m}$, the mean mass density of the universe, and to a lesser extent, $\Omega _{de}$, the dark energy density of the universe. Attempts to deduce $\Omega _{m}$ based on X-ray surveys have met with some success \cite{Rosati02}, but they have been hampered by the fact that at these wavebands cluster samples become sparse at z$>$1 owing to their low surface brightness. Because the SZE is intrinsically redshift independent, one has the possibility of detecting clusters over a wide range of redshifts. Blind surveys with sufficient sensitivity can in principle detect clusters from z=0 to their formation redshift $z \leq 1.5$ \cite{carlstrom02}, paving the way for more precise cosmological parameter measurements. Follow-up pointed observations of a large sample of galaxy clusters over a range of redshifts would enable a detailed study of their formation and evolution. Such studies would confirm or modify our theory of structure formation, improve our understanding of galaxy evolution, and reveal a great deal about the complex physical processes operating in the intracluster medium (ICM).

This paper summarizes four lectures the author delivered at the Varenna Summer School entitled ``Background Microwave Radiation and Intracluster Cosmology'',
held July 2004 in Varenna, Italy. Originally, the organizers asked me to deliver three lectures covering numerical simulations of galaxy clusters, as well as to review the basics of cosmological structure formation, of which galaxy clusters are just one aspect. The first lecture of the school was to have been given by Dr. Rocky Kolb on the cosmological standard model and the linear growth of density perturbations. When he was unable to attend the school, that responsibility fell to me, increasing my task to four lectures.
Fortunately, Dr. Kolb's lecture slides were made available to me, which I used verbatim. The following Section 2 follows closely the content and organization of Dr. Kolb's lecture notes, while Sections 3-5 are my own.
Section 3 reviews key concepts and results from structure formation theory that provide the vocabulary and framework for interpreting observations and simulations of galaxy clusters. Section 4 discusses the technical challenges associated with simulating gas in galaxy clusters and reviews the numerical methods we have employed. Section 5 presents results of numerical simulations of statistical ensembles of galaxy glusters whose goal is to understand how observables such as X-ray luminosity, emission-weighted temperature, and SZE depend on cluster mass and baryonic physics.

In line with the character of the summer school, I have attempted to be pedagogical, emphasizing the key concepts and results that a student needs to know if he/she wants to understand the current literature or do research in this area. Literature citations are kept to a minimum, except for textbooks, reviews, and research papers that I found to be particularly helpful in preparing this article.

\section {Cosmological framework and perturbation growth in the linear regime}

Our modern theory of the structure and evolution of the universe, along with the observational data which support it, is admirably presented in a recent textbook by Dodelson \cite{dodelson03}.
Remarkable observational progress has been made in the past two decades which has strengthened our confidence in the correctness of the hot, relativistic, expanding universe model (Big Bang), has measured the universe's present mass-energy contents and kinematics, and lent strong support to the notion of a very early, inflationary phase. Moreover,
observations of high redshift supernovae unexpectedly have revealed that the cosmic expansion is accelerating at the present time, implying the existence of a pervasive, dark energy field with negative pressure \cite{Perlmutter03}. This surprising discovery has enlivened observational efforts to accurately measure the cosmological parameters over as large a fraction of the age of the universe as possible, especially over the redshift interval 0
$<$ z $<$ 1.5 which, according to current estimates, spans the deceleration-acceleration transition. These efforts include large surveys of galaxy large scale structure, galaxy clusters, weak lensing, the Lyman alpha forest, and high redshift supernovae, all of which span the relevant redshift range. Except for the supernovae, all other techniques rely on measurements of cosmological structure in order to deduce cosmological parameters.

\subsection{Cosmological standard model}
The dynamics of the expanding universe is described by the two Friedmann equations derived from Einstein's theory of general relativity under the assumption of homogeneity and isotropy. The expansion rate at time $t$ is given by
\begin{equation}\label{eq1}
H^2(t)\equiv \left( {\frac{\dot {a}}{a}} \right)^2=\frac{8\pi G}{3}\sum\limits_i {\rho _i } -\frac{k}{a^2}+\frac{\Lambda }{3}
\end{equation}
where $H(t)$ is the Hubble parameter and $a(t)$ is the FRW scale factor at time
$t$. The first term on the RHS is proportional to the sum over all energy densities in the universe $\rho _{i }$ including baryons, photons, neutrinos, dark matter and dark energy. We have explicitly pulled the dark energy term out of the sum and placed it in the third term assuming it is a constant (the cosmological constant). The second term is the curvature term, where
$k=0,\pm 1$ for zero, positive, negative curvature, respectively. Equation (\ref{eq1})
can be cast in a form useful for numerical integration if we introduce
$\Omega $ parameters:
\begin{equation}\label{eq2}
\Omega _i \equiv \frac{8\pi G}{3H^2}\rho _i ,\mbox{ }\Omega _\Lambda \equiv
\frac{8\pi G}{3H^2}\rho _\Lambda =\frac{\Lambda }{3H^2},\mbox{ }\Omega_k
\equiv \frac{-k}{(aH)^2}
\end{equation}
Dividing equation (\ref{eq1}) by $H^2$ we get the sum rule 1=$\Omega _{m}+\Omega _{k}+\Omega _{\Lambda }$, which is true at all times,
where $\Omega _{m}$ is the sum over all $\Omega _{i}$ excluding dark energy. At the present time $H(t)=H_{0}, a=1$, and cosmological density parameters become
\begin{equation}\label{eq3}
\Omega _i (0)=\frac{8\pi G}{3H_0^2 }\rho _i (0),\mbox{ }\Omega _\Lambda
(0)=\frac{\Lambda }{3H_0^2 },\mbox{ }\Omega _k (0)=\frac{-k}{H_0^2 }
\end{equation}
Equation (\ref{eq1}) can then be manipulated into the form
\begin{equation}\label{eq4}
\dot {a}=H_0 [\Omega _m (0)(a^{-1}-1)+\Omega _\gamma (0)(a^{-2}-1)+\Omega _\Lambda (0)(a^2-1)+1]^{1/2}
\end{equation}
Here we have explicitly introduced a density parameter for the background radiation field $\Omega _{\gamma }$ and used the fact that matter and radiation densities scale as a$^{-3}$ and a$^{-4}$, respectively,
and we have used the sum rule to eliminate $\Omega _{k}$. Equation (\ref{eq4}) is equation (\ref{eq1}) expressed in terms of the \textit{current} values of the density and Hubble parameters, and makes explicit the scale factor dependence of the various contributions to the expansion rate. In particular, it is clear that the expansion rate is dominated first by radiation, then by matter, and finally by the cosmological constant.

Current measurements of the cosmological parameters by different techniques
\cite{spergel03} yield the following numbers [(0) notation suppressed]:
\[
\begin{array}{l}
h\equiv H_0 /(100km/s/Mpc)\approx 0.72 \\
\Omega _{total} \approx 1,\mbox{ }\Omega _\Lambda \approx 0.73\mbox{,
}\Omega _m =\Omega _{cdm} +\Omega _b \approx 0.27,\Omega _k \approx 0 \\
\Omega _b \approx 0.04,\mbox{ }\Omega _\nu \approx 0.005,\mbox{ }\Omega _\gamma \approx 0.00005 \\
\end{array}
\]
This set of parameters is referred to as the concordance model \cite{bops99}, and describes a spatially flat, low matter density, high dark energy density universe in which baryons, neutrinos, and photons make a negligible contribution to the large scale dynamics. Most of the matter in the universe is cold dark matter (CDM) whose dynamics is discussed below. As we will also see below, baryons and photons make an important contribution to shaping of the matter power spectrum despite their small contribution to the present-day energy budget. Understanding the evolution of baryons in nonlinear structure formation is essential to interpret X-ray and SZE observations of galaxy clusters.

The second Friedmann equation relates the second time derivative of the scale factor to the cosmic pressure $p $ and energy density\textit{ $\rho $}
\begin{equation}\label{eq5}
\frac{\ddot {a}}{a}=-\frac{4\pi G}{3}(\rho +3p),\mbox{ }\rho =\sum\limits_i
{\rho _i } =\rho _m +\rho _\gamma +\rho _\Lambda
\end{equation}
$p$ and $\rho $ are related by an equation of state $p_{i}=w_{i}\rho _{i}$, with $w_{m}$=0, $w_{\gamma }$=1/3, and $w_{\Lambda }= -1$. We thus have
\begin{equation}\label{eq6}
\frac{\ddot {a}}{a}=-\frac{4\pi G}{3}(\rho _m +2\rho _\gamma -2\rho _\Lambda
)\mbox{. }
\end{equation}
Expressed in terms of the current values for the cosmological parameters we have
\begin{equation}\label{eq7}
\frac{\ddot {a}}{a}=-\frac{1}{2}H_0^2 [\Omega _m (0)a^{-3}+2\Omega _\gamma
(0)a^{-4}-2\Omega _\Lambda (0)]\mbox{. }
\end{equation}
Evaluating equation \ref{eq7} using the concordance parameters, we see the universe is currently accelerating $\ddot {a}\approx 0.6H_0^2 \mbox{ }$ .
Assuming the dark energy density is a constant, the acceleration began when
\begin{equation}\label{eq8}
a\equiv \frac{1}{1+z}=\left( {\frac{\Omega _m (0)}{2\Omega _\Lambda (0)}}
\right)^{\mbox{1/3}}\mbox{ }\approx 0.57
\end{equation}
or $z\sim 0.75$.

\subsection{The Linear power spectrum}

Cosmic structure results from the amplification of primordial density fluctuations by gravitational instability. The power spectrum of matter density fluctuations has now been measured with considerable accuracy across roughly four decades in scale. Figure \ref{fig1} shows the latest results,
taken from reference
\cite{tegmark03}. Combined in this figure are measurements using cosmic microwave background (CMB) anisotropies, galaxy large scale structure, weak lensing of galaxy shapes, and the Lyman alpha forest, in order of decreasing comoving wavelength. In addition, there is a single data point for galaxy clusters, whose current space density measures the amplitude of the power spectrum on 8 h$^{-1}$ Mpc scales \cite{wef93}.
Superimposed on the data is the predicted $\Lambda $CDM linear power spectrum at z=0 for the concordance model parameters. As one can see, the fit is quite good. In actuality, the concordance model parameters are determined by fitting the data. A rather complex statistical machinery underlies the determination of cosmological parameters, and is discussed in Dodelson (2003, Ch. 11). The fact that modern CMB and LSS data agree over a substantial region of overlap gives us confidence in the correctness of the concordance model. In this section, we define the power spectrum mathematically, and review the basic physics which determines its shape.
Readers wishing a more in depth treatment are referred to references
\cite{dodelson03,kolbturner90}.

\begin{figure}[htbp]
\centerline{\includegraphics[width=4.15in,height=3.8in]{fig1small.eps}}
\caption{Linear matter power spectrum P(k) versus wavenumber extrapolated to z=0, from various measurements of cosmological structure. The best fit
$\Lambda $CDM model is shown as a solid line. From \cite{tegmark03}.}
\label{fig1}
\end{figure}

At any epoch $t$ (or $a$ or $z)$ express the matter density in the universe in terms of a mean density and a local fluctuation:
\begin{equation}\label{eq9}
\rho (\vec {x})=\bar {\rho }(1+\delta (\vec {x}))
\end{equation}
where $\delta (\vec {x})$is the density contrast. Expand $\delta (\vec
{x})$ in Fourier modes:
\begin{equation}\label{eq10}
\delta (\vec {x})\equiv \frac{\rho (\vec {x})-\bar {\rho }}{\bar {\rho
}}=\int {\delta (\vec {k})\exp (-i\vec {k}\cdot \vec {x})d^3} k.
\end{equation}
The autocorrelation function of $\delta (\vec {x})$ defines the power spectrum through the relations
\begin{equation}\label{eq11}
\left\langle {\delta (\vec {x})\delta (\vec {x})} \right\rangle
=\int\limits_0^\infty {\frac{dk}{k}} \frac{k^3\left| {\delta ^2(\vec {k})}
\right|}{2\pi ^2}=\int\limits_0^\infty {\frac{dk}{k}} \frac{k^3P(k)}{2\pi
^2}=\int\limits_0^\infty {\frac{dk}{k}} \Delta ^2(k)
\end{equation}
where we have the definitions
\begin{equation}\label{eq12}
P(k)\equiv \left| {\delta ^2(\vec {k})} \right|,\mbox{ and }\Delta
^2(k)\equiv \frac{k^3P(k)}{2\pi ^2}.
\end{equation}
The quantity $\Delta ^2(k)$ is called the dimensionless power spectrum and is an important function in the theory of structure formation. $\Delta
^2(k)$ measures the contribution of perturbations per unit logarithmic interval at wavenumber $k$ to the variance in the matter density fluctuations.
The $\Lambda $CDM power spectrum asymptotes to $P(k)\sim k^{1}$ for small
$k$, and $P(k)\sim k^{-3}$ for large $k$, with a peak a $k^{\star}\sim 2\times 10^{-2}$ h Mpc$^{-1}$ corresponding to $\lambda^{\star}\sim $350 h$^{-1}$ Mpc.
$\Delta ^2(k)$ is thus asymptotically flat at high $k$, but drops off as
$k^{4}$ at small $k$. We therefore see that most of the variance in the cosmic density field in the universe at the present epoch is on scales $\lambda < \lambda^{\star}.$

\begin{figure}[htbp]
\centerline{\includegraphics[width=4in,height=3in]{fig2.eps}}
\caption{The tale of two fluctuations. A fluctuation which is superhorizon scale at matter-radiation equality grows always, while a fluctuation which enters the horizon during the radiation dominated era stops growing in amplitude until the matter dominated era begins.}
\label{fig2}
\end{figure}

What is the origin of the power spectrum shape? Here we review the basic ideas.
Within the inflationary paradigm, it is believed that quantum mechanical
(QM) fluctuations in the very early universe were stretched to macroscopic scales by the large expansion factor the universe underwent during inflation. Since QM fluctuations are random, the primordial density perturbations should be well described as a Gaussian random field.
Measurements of the Gaussianity of the CMB anisotropies \cite{komatsu03} have confirmed this. The primordial power spectrum is parameterized as a power law $P_p
(k)\propto k^n$, with $n=1$ corresponding to scale-invariant spectrum proposed by Harrison and Zeldovich on the grounds that any other value would imply a preferred mass scale for fluctuations entering the Hubble horizon.
Large angular scale CMB anisotropies measure the primordial power spectrum directly since they are superhorizon scale. Observations with the WMAP satellite are consistent with $n=1$.

To understand the origin of the spectrum, we need to understand how the amplitude of a fluctuation of fixed comoving wavelength $\lambda$ grows with time. Regardless of its wavelength, the fluctuation will pass through the Hubble horizon as illustrated in Fig. \ref{fig2}. This is because the Hubble radius grows linearly with time, while the proper wavelength a$\lambda $
grows more slowly with time. It is easy to show from Eq. \ref{eq1} that in the radiation-dominated era, $a\sim t^{1/2}$, and in the matter-dominated era
(prior to the onset of cosmic acceleration) $a\sim t^{2/3}$. Thus, inevitably,
a fluctuation will transition from superhorizon to subhorizon scale. We are interested in how the amplitude of the fluctuation evolves during these two phases. Here we merely state the results of perturbation theory (e.g.,
Dodelson 2003, Ch. 7).

\begin{figure}[htbp]
\begin{tabular}{c}
\centerline{\includegraphics[width=4in,height=2in]{fig3a.eps}} \\
\centerline{\includegraphics[width=4.4in,height=2.2in]{fig3.eps}}
\end{tabular}
\caption{ a) Evolution of the primordial power spectrum on superhorizon scales during the radiaton dominated era. b) Scale-free spectrum produces a constant contribution to the density variance per logarithmic wavenumber interval entering the Hubble horizon (no preferred scale) c) resulting matter power spectrum, super- and sub-horizon. Figures courtesy Rocky Kolb.}
\label{fig3}
\end{figure}

\subsection{Growth of fluctuations in the linear regime }

To calculate the growth of superhorizon scale fluctuations requires general relativistic perturbation theory, while subhorizon scale perturbations can be analyzed using a Newtonian Jeans analysis. We are interested in scalar density perturbations, because these couple to the stress tensor of the matter-radiation field. Vector perturbations (e.g., fluid turbulence)
are not sourced by the stress-tensor, and decay rapidly due to cosmic expansion. Tensor perturbations are gravity waves, and also do not couple to the stress-tensor. A detailed analysis for the scalar perturbations yields the following results. In the \underline {radiation dominated era},
\[
\begin{array}{l}
\delta _+ (t)=\delta _+ (t_i )(t/t_i )\mbox{ superhorizon scales} \\
\delta _+ (t)=constant \mbox{ ~~~subhorizon scales} \\
\end{array}
\]
while in the \underline {matter dominated era},
\[
\begin{array}{l}
\delta _+ (t)=\delta _+ (t_i )(t/t_i )^{2/3}\mbox{ superhorizon scales} \\
\delta _+ (t)=\delta _+ (t_i )(t/t_i )^{2/3}\mbox{ subhorizon scales} \\
\end{array}
\]
This is summarized in Fig. \ref{fig2}, where we consider two fluctuations of different comoving wavelengths, which we will call large and small. The large wavelength perturbation remains superhorizon through matter-radiation equality (MRE), and enters the horizon in the matter dominated era. Its amplitude will grow as $t$ in the radiation dominated era, and as $t^{2/3}$ in the matter dominated era. It will continue to grow as $t^{2/3}$ after it becomes subhorizon scale. The small wavelength perturbation becomes subhorizon before MRE. Its amplitude will grow as $t$ while it is superhorizon scale, remain constant while it is subhorizon during the radiation dominated era, and then grow as $t^{2/3}$ during the matter-dominated era.

Armed with these results, we can understand what is meant by a scale-free primordial power spectrum (the Harrison-Zeldovich power spectrum.) We are concerned with perturbation growth in the very early universe during the radiation dominated era. Superhorizon scale perturbation amplitudes grow as
$t$, and then cease to grow after they have passed through the Hubble horizon.
We can define a Hubble wave number $k_H \equiv 2\pi /R_H \propto t^{-1}.$ Fig. 3a shows the primordial power spectrum at three instants in time for k$<$k$_{H}$. We see that the fluctuation amplitude at k=k$_{H}$(t)
depends on primordial power spectrum slope n. The scale-free spectrum is the value of n such that $\Delta ^2(k_{H}(t))$=constant for k$>$k$_{H}$. A simple analysis shows that this implies n=1. Since $\Delta ^2(k)\propto k^3P(k)$, we then have
\[
\begin{array}{l}
P(k)\propto k^1,\mbox{ }k\le k_H \\
P(k)\propto k^{-3},\mbox{ }k>k_H \\
\end{array}
\]

In actuality, the power spectrum has a smooth maximum, rather than a peak as shown in Fig. 3c. This smoothing is caused by the different rates of growth before and after matter-radiation equality.
The transition from radiation to matter-dominated is not instantaneous. Rather, the expansion rate of the universe changes smoothly through equality, as given by Eq. 1, and consequently so do the temporal growth rates. The position of the peak of the power spectrum is sensitive to the when the universe reached matter-radiation equality, and hence is a probe of $\Omega _\gamma /\Omega _m $.

Once a fluctuation becomes sub-horizon, dissipative processes modify the shape of the power spectrum in a scale-dependent way. Collisionless matter will freely stream out of overdense regions and smooth out the inhomogeneities. The faster the particle, the larger its free streaming length. Particles which are relativistic at MRE, such as light neutrinos,
are called hot dark matter (HDM). They have a large free-streaming length,
and consequently damp the power spectrum over a large range of k. Weakly Interacting Massive Particles (WIMPs) which are nonrelativistic at MRE, are called cold dark matter (CDM), and modify the power spectrum very little
(Fig. \ref{fig4}).
Baryons are tightly coupled to the radiation field by electron scattering prior to recombination. During rcombination, the photon mean-free path becomes large. As photons stream out of dense regions, they drag baryons along, erasing density fluctuations on small scales. This process is called Silk damping, and results in damped oscillations of the baryon-photon fluid once they become subhorizon scale. The magnitude of this effect is sensitive to the ratio of baryons to collisionless matter, as shown in Fig.
\ref{fig4}.

\begin{figure}[htbp]
\includegraphics[width=2.5in,height=1.7in]{fig4.eps}
\includegraphics[width=2.5in,height=1.7in]{fig5.eps}
\caption{Effect of dissipative processes on the evolved power spectrum. Left: Effect of collisionless damping (free streaming) in the dark matter. Right: Effect of collisional damping (Silk damping) in the matter-radiation fluid. Figures courtesy Rocky Kolb.}
\label{fig4}
\end{figure}

\section {Analytic models for nonlinear growth, virial scaling \\
relations, and halo statistics}

Here we introduce a few concepts and analytic results from the theory of structure formation which underly the use of galaxy clusters as cosmological probes. These provide us with the vocabulary which pervades the literature on analytic and numerical models of galaxy cluster evolution. Material in this section has been derived from three primary sources: Padmanabhan (1993)
\cite{pad93} for the spherical top-hat model for nonlinear collapse, Dodelson (2003)
\cite{dodelson03} for Press-Schechter theory, and Bryan {\&} Norman (1998) \cite{BN98}
for virial scaling relations.

\subsection{Nonlinearity defined}

In the linear regime, both super- and sub-horizon scale perturbations grow as $t^{2/3}$ in the matter-dominated era. This means that after recombination,
the linear power spectrum retains its shape while its amplitude grows as
$t^{4/3}$ before the onset of cosmic acceleration. When $\Delta ^2(k)$ for a given k approaches unity linear theory no longer applies, and some other method must be used to determine the fluctuation's growth. In general,
numerical simulations are required to model the nonlinear phase of growth because in the nonlinear regime, the modes do not grow independently.
Mode-mode coupling modifies both the shape and amplitude of the power spectrum over the range of wavenumbers that have gone nonlinear.

At any given time, there is a critical wavenumber which we shall call the nonlinear wavenumber k$_{nl}$ which determines which portion of the spectrum has evolved into the nonlinear regime. Modes with k$<$k$_{nl}$ are said to be linear, while those for which k$>$ k$_{nl}$ are nonlinear.
Conventionally, one defines the nonlinear wavenumber such that $\Delta
(k_{nl} ,z)=1.$ From this one can derive a nonlinear mass scale $M_{nl}
(z)=\frac{4\pi }{3}\bar {\rho }(z)\left( {\frac{2\pi }{k_{nl} }} \right)^3$.
A more useful and rigorous definition of the nonlinear mass scale comes from evaluating the amplitude of mass fluctuations within spheres or radius R at epoch z. The enclosed mass is $M=\frac{4\pi }{3}\bar {\rho }(z)R^3.$ The mean square mass fluctuations (variance) is
\begin{equation}\label{eq17}
\left\langle {(\delta M/M)^2} \right\rangle \equiv \sigma ^2(M)=\int
{d^3kW_T^2 (kR)P(k,z),}
\end{equation}
where W is the Fourier transform of the top-hat window function
\begin{equation}\label{eq18}
\begin{array}{l}
\mbox{W(}{\rm {\bf x}}\mbox{)}=\left\{ {{\begin{array}{*{20}c}
{3/4\pi R^3,\mbox{ }\left| {\rm {\bf x}} \right|<R} \hfill \\
{0,\mbox{ }\left| {\rm {\bf x}} \right|\ge R} \hfill \\
\end{array} }} \right. \\
\to W_T (kR)=3\left[ {\sin (kR)/kR-\cos (kR)} \right]/(kR)^2. \\
\end{array}
\end{equation}
If we approximate P(k) locally with a power-law $P(k,z)=D^2(z)k^m$, where D is the linear growth factor, then $\sigma ^2(M)\propto D^2R^{-(3+m)}\propto D^2M^{-(3+m)/3}.$ From this we see that the RMS fluctuations are a decreasing function of M. At very small mass scales, m$\rightarrow -3$, and the fluctuations asymptote to a constant value. We now define the nonlinear mass scale by setting $\sigma $(M$_{nl})$=1. We get that (\cite{white94})
\begin{equation}\label{eq19}
M_{nl} (z)\propto D(z)^{6/(3+m)}\mbox{ (}\propto
\mbox{(1}+\mbox{z)}^{\mbox{-6/(3}+\mbox{m)}}\mbox{ for EdS).}
\end{equation}
For $m > -3$, the smallest mass scales become nonlinear first. This is the origin of hierarchical (``bottom-up'') structure formation.

\subsection{Spherical Top-Hat Model}

\begin{figure}[htbp]
\centerline{\includegraphics[width=3in,height=2in]{fig6.eps}}
\caption{Evolution of a top-hat perturbation in an EdS universe. Depending on the E, the first integral of motion, the fluctuation collapses (E$<$0),
continues to expand (E$>$0), or asymptotically reaches it maximum radius
(E=0). Virialization occurs when the fluctuation has collapsed to half its turnaround radius.}
\label{fig5}
\end{figure}

We now ask what happens when a spherical volume of mass M and radius R exceeds the nonlinear mass scale. The simplest analytic model of the nonlinear evolution of a discrete perturbation is called the spherical top-hat model. In it, one imagines as spherical perturbation of radius $R$
and some constant overdensity $\bar {\delta }=3M/4\pi R^3$ in an Einstein-de Sitter
(EdS) universe. By Birkhoff's theorem the equation of motion for R is
\begin{equation}\label{eq20}
\frac{d^2R}{dt^2}=-\frac{GM}{R^2}=-\frac{4\pi G}{3}\bar {\rho }(1+\bar
{\delta })R
\end{equation}
whereas the background universe expands according to Eq. \ref{eq6}
\begin{equation}\label{eq21}
\frac{d^2a}{dt^2}=-\frac{4\pi G}{3}\bar {\rho }a.
\end{equation}

Comparing these two equations, we see that the perturbation evolves like a universe of a different mean density, but with the same initial expansion rate. Integrating Eq. \ref{eq20} once with respect to time gives us the first integral of motion:
\begin{equation}\label{eq22}
\frac{1}{2}\left( {\frac{dR}{dt}} \right)^2-\frac{GM}{R}=E,
\end{equation}
where E is the total energy of the perturbation. If E$<$0, the perturbation is bound, and obeys
\begin{equation}\label{eq23}
\frac{R}{R_m}=\frac{(1-cos \theta)}{2}, ~~~\frac{t}{t_m}=\frac{(\theta-sin\theta)}{\pi}
\end{equation}
where $R_m$ and $t_m$ are the radius and time of ``turnaround''. At turnaround
(as $\theta \rightarrow \pi$), the fluctuation reaches its maximum proper radius (see Fig. \ref{fig5}). As
$t\rightarrow 2t_m, R\rightarrow 0$, and we say the fluctuation has collapsed.

A detailed analysis of the evolution of the top-hat perturbation is given in Padmanabhan (1993, Ch. 8) for general $\Omega_m$.
Here we merely quote results for an EdS universe.
The mean \textit{linear} overdensity at turnaround; i.e., the value one would predict from the linear growth formula $\delta \sim t^{2/3}$, is 1.063. The actual overdensity at turnaround using the nonlinear model is 4.6. This illustrates that nonlinear effects set in well before the amplitude of a linear fluctuation reaches unity. As R$\rightarrow $0, the nonlinear overdensity becomes infinite.
However, the linear overdensity at $t=2t_m$ is only 1.686. As the fluctuation collapses, other physical processes (pressure, shocks, violent relation)
become important which establish a gravitationally bound object in virial equilibrium before infinite density is reached. Within the framework of the spherical top-hat model, we say virialization has occurred when the kinetic and gravitational energies satisfy virial equilibrium: $\left| U \right|=2K.$ It is easy to show from conservation of energy that this occurs when $R=R_m/2$; in other words, when the fluctuation has collapsed to half its turnaround radius. The nonlinear overdensity at virialization $\Delta _c$
is not infinite since the radius is finite.
For an EdS universe, $\Delta _c =18\pi ^2\approx 180$. Fitting formulae for non-EdS models are provided in the next section.

\subsection{Virial Scaling Relations}
The spherical top-hat model can be scaled to perturbations of arbitrary mass. Using virial equilibrium arguments, we can predict various physical properties of the virialized object. The ones that interest us most are those that relate to the observable properties of gas in galaxy clusters,
such as temperature, X-ray luminosity, and SZ intensity change. Kaiser \cite{kaiser86}
first derived virial scaling relations for clusters in an EdS universe. Here we generalize the derivation to non-EdS models of interest. In order to compute these scaling laws, we must assume some model for the distribution of matter as a function of radius within the virialized object. A top-hat distribution with a density $\rho =\Delta _c \bar {\rho }(z)$ is not useful because it is not in mechanical equilibrium. More appropriate is the isothermal,
self-gravitating, equilibrium sphere for the collisionless matter, whose density profile is related to the one-dimensional velocity dispersion
\cite{bt87}
\begin{equation}\label{eq24}
\rho (r)=\frac{\sigma ^2}{2\pi Gr^2}.
\end{equation}
If we define the virial radius r$_{vir}$ to be the radius of a spherical volume within which the mean density is $\Delta _{c}$ times the critical density at that redshift ($M=4\pi r_{vir}^3 \rho _{crit} \Delta _c /3)$, then there is a relation between the virial mass M and $\sigma $:
\begin{equation}
\label{eq25}
\sigma =M^{1/3}[H^2(z)\Delta _c G^2/16]^{1/6}\approx 476f_\sigma \left(
{\frac{M}{10^{15}M_\odot }} \right)^{1/3}(h^2\Delta _c E^2)^{1/6}\mbox{ km s}^{\mbox{-1}}.
\end{equation}
Here we have introduced a normalization factor $f_{\sigma}$ which will be used to match the normailization from simulations. The redshift dependent Hubble parameter can be written as $H(z)=100hE(z)\mbox{ km s}^{-1}$ with the function $E^2(z)=\Omega _m (1+z)^3+\Omega _k (1+z)^2+\Omega _\Lambda $,
where the $\Omega$'s have been previously defined.

The value of $\Delta_c$ is taken from the spherical top-hat model, and is 18$\pi
^{2}$ for the critical EdS model, but has a dependence on cosmology through the parameter $\Omega (z)=\Omega _m (1+z)^3/E^2(z).$ Bryan and Norman
(1998) provided fitting formulae for $\Delta_c$ for the critical for both open universe models and flat, lambda-dominated models
\begin{equation}\label{eq26}
\Delta _c =18\pi ^2+82x-39x^2\mbox{ for }\Omega _k =0,\mbox{ }\Delta _c
=18\pi ^2+60x-32x^2\mbox{ for }\Omega _\Lambda =0
\end{equation}
where x=$\Omega $(z)-1.

If the distribution of the baryonic gas is also isothermal, we can define a ratio of the ``temperature'' of the collisionless material ($T_\sigma =\mu m_p \sigma ^2/k)$ to the gas temperature:
\begin{equation}
\label{eq27}
\beta =\frac{\mu m_p \sigma ^2}{kT}
\end{equation}
Given equations (\ref{eq26}) and (\ref{eq27}), the relation between temperature and mass is then
\begin{equation}
\label{eq28}
kT=\frac{GM^{2/3}\mu m_p }{2\beta }\left[ {\frac{H^2(z)\Delta _c }{2G}}
\right]^{1/3}\approx 1.39f_T \left( {\frac{M}{10^{15}M_\odot }}
\right)^{2/3}(h^2\Delta _c E^2)^{1/3}\mbox{ keV,}
\end{equation}
where in the last expression we have added the normalization factor f$_{T}$
and set $\beta $=1.

The scaling behavior for the object's X-ray luminosity is easily computed by assuming bolometric bremsstrahlung emission and ignoring the temperature dependence of the Gaunt factor: $L_{bol} \propto
\int {\rho ^2} T^{1/2}dV\propto M_b \rho T^{1/2}.$ where M$_{b}$ is the baryonic mass of the cluster. This is infinite for an isothermal density distribution, since $\rho $ is singular. Observationally and computationally, it is found that the baryon distribution rolls over to a constant density core at small radius. A procedure is described in Bryan and Norman (1998) which yields a finite luminosity:
\begin{equation}
\label{eq29}
L_{bol} =1.3\times 10^{45}\left( {\frac{M}{10^{15}M_\odot }}
\right)^{4/3}(h^2\Delta _c E^2)^{7/6}\mbox{ }\left( {\frac{\Omega _b
}{\Omega _m }} \right)^2\mbox{ erg s}^{-1}.
\end{equation}
Eliminating M in favor of T in Eq. \ref{eq29} we get
\begin{equation}
\label{eq30}
L_{bol} =6.8\times 10^{44}\left( {\frac{kT/f_T }{1.0\mbox{ keV}}}
\right)^2(h^2\Delta _c E^2)^{1/2}\mbox{ }\left( {\frac{\Omega _b }{\Omega _m
}} \right)^2\mbox{ erg s}^{-1}.
\end{equation}
The scaling of the SZ ``luminosity'' is likewise easily computed. If we define L$_{SZ}$ as the integrated SZ intensity change: $L_{SZ} =\int {dA\int {n_e
\sigma _T } } \left( {\frac{kT}{m_e c^2}} \right)dl\propto M_b T$, then
\begin{equation}\label{eq30a}
L_{SZ} =\frac{GM^{5/3}\sigma _T }{2\beta m_e c^2}\left[ {\frac{H^2(z)\Delta _c }{2G}} \right]^{1/3}\left( {\frac{\Omega _b }{\Omega _m }} \right).
\end{equation}
We note that cosmology enters these relations only with the combination of parameters $h^2\Delta _c E^2$, which comes from the relation between the cluster's mass and the mean density of the universe at redshift z. The redshift variation comes mostly from E(z), which is equal to (1+z)$^{3/2}$
for an EdS universe.

\subsection{Statistics of hierarchical clustering: Press-Schechter theory}
Now that we have a simple model for the nonlinear evolution of a spherical density fluctuation and its observable properties as a function of its virial mass, we would like to estimate the number of virialized objects of mass M as a function of redshift given the matter power spectrum. This is the key to using surveys of galaxy clusters as cosmological probes. While large scale numerical simulations can and have been used for this purpose
(see below), we review a powerful analytic approach by Press and Schechter
\cite{ps74} which turns out to be remarkably close to numerical results. The basic idea is to imagine smoothing the cosmological density field at any epoch z on a scale R such that the mass scale of virialized objects of interest satisfies $M=\frac{4\pi }{3}\bar {\rho }(z)R^3.$ Because the density field
(both smoothed and unsmoothed) is a Gaussian random field, the probability that the mean overdensity in spheres of radius R exceeds a critical overdensity $\delta _{c}$ is
\begin{equation}\label{eq31}
p(R,z)=\frac{2}{\sqrt {2\pi } \sigma (R,z)}\int\limits_{\delta _c }^\infty
{d\delta } \exp \left( {-\frac{\delta ^2}{2\sigma ^2(R,z)}} \right)
\end{equation}
where $\sigma(R,z)$ is the RMS density variation in spheres of radius R as discussed above.
Press and Schechter suggested that this probability be identified with the fraction of particles which are part of a nonlinear lump with mass exceeding M if we take $\delta _c =1.686,$ the linear overdensity at virialization.
This assumption has been tested against numerical simulations and found to be quite good \cite{wef93}. The fraction of the volume collapsed into objects with mass between $M$ and $M+dM$ is given by
$(dp/dM)dM$. Multiply this by the average number density of such objects
$\rho _m /M$ to get the number density of collapsed objects between
$M$ and $M+dM$:
\begin{equation}\label{eq32}
dn(M,z)=-\frac{\bar {\rho }}{M}\frac{dp(M(R),z)}{dM}dM.
\end{equation}
The minus sign appears here because p is a decreasing function of M.
Carrying out the derivative using the fact that $dM/dR=3M/R,$ we get
\begin{equation}\label{eq33}
\frac{dn(M,z)}{dM}=\sqrt {\frac{2}{\pi }} \frac{\bar {\rho }\delta _c
}{3M^2\sigma }e^{-\delta _c^2 /2\sigma ^2}\left[ {-\frac{d\ln \sigma }{d\ln R}} \right].
\end{equation}
The term is square brackets is related to the logarithmic slope of the power spectrum, which on the mass scale of galaxy clusters is close to unity. Eq.
\ref{eq33} is called the \textit{halo mass function}, and it has the form of a power law multiplied by an exponential. To make this more explicit, approximate the power spectrum on scales of interest as a power law as we have done above. Substituting the scaling relations for $\sigma $ in Eq. \ref{eq33} one gets the result \cite{white94}
\begin{equation}\label{eq34}
\frac{dn}{dM}=\left( {\frac{2}{\pi }} \right)^{1/2}\frac{\bar {\rho
}}{M^2}\left( {1+\frac{m}{3}} \right)\left[ {\frac{M}{M_{nl} (z)}}
\right]^{\frac{m-3}{6}}\exp \left[ {-\left( {\frac{M}{M_{nl} (z)}}
\right)^{\frac{3+m}{3}}/2} \right].
\end{equation}
Here, $M_{nl} (z)$ is the nonlinear mass scale. To be more consistent with the spherical top-hat model, it satisfies the relation $\sigma (M_{nl}
,z)=\delta _c $; i.e., those fluctuations in the smoothed density field that have reached the linear overdensity for which the spherical top-hat model predicts virialization.

\subsection{Application to galaxy clusters}

\begin{figure}[htbp]
\includegraphics[width=5in,height=3.33in]{fig7.eps}
\caption{Top left to bottom right: a) Integrated cluster mass function for three cosmologies and two redshifts; b) like a), but for integrated temperature function; c) like a) but for integrated SZ cross section; d)
redshift distribution of the integrated probability to find a cluster exceeding $M=3.5 \times 10^{14} h^{-1} M_{\odot}$; e) redshift distribution of the integrated probability to find a cluster exceeding kT=5 keV; f) redshift distribution of the integrated probability to find a cluster exceeding Y=$10^{-3}$ h arcmin$^{2}$. From \cite{ecf96}.}
\label{fig6}
\end{figure}

Galaxy clusters correspond to rare ($\sim $3$\sigma )$ peaks in the density field. Combining the halo mass function as prediced by the PS formalism with the scaling laws derived above, we can predict the evolution of the statistical properties of X-ray and SZ clusters of galaxies. Here we show a few results taken from Eke, Cole {\&} Frenk
(1996) \cite{ecf96}.
Fig. 6a shows the evolution of the integrated mass function $n(>M)$
for several cosmologies and redshifts. One can see the power-law behavior at lower mass and the exponential cutoff at higher M. One sees strong redshift evolution of the number of massive clusters in the EdS model, but slower evolution on the open and lambda models. This is because of the saturated growth of structure in low density models. This makes number counts of massive clusters a sensitive test of the linear growth factor D(z), which depends on $\Omega_m$ and $\Omega_{\Lambda}$.
Convolving the cluster population with the scaling relations for T(M) and Y(M), one gets distribution functions for n($>$T) and n($>$Y). Here $Y=L_{SZ}/d_A^2$ is the effective SZE cross section of a cluster, where $d_A$ is its angular diameter distance.
These are shown in Figs. 6b and 6c. Another way to present the data is to convolve the mass function with the differential volume element as a function of redshift for the three models. Figs. 6d-f plot the redshift probability of detecting a cluster with M, T, and Y exceeding the fiducial values given in the figure caption. As one can see,
the profiles are sharply peaked at low redshift for the EdS model, but substantially broader and peaking at higher redshift for the low density universe models. There is, however, rather little difference between the open and lambda-dominated models as far as the probability distributions for M and Y. Things are somewhat better for T, implying that some combination of X-ray and SZE measurements will be needed for precision cosmological parameter determinations.

\section{Numerical simulations of gas in galaxy clusters}

The central task is for a given cosmological model, calculate the formation and evolution of a population of clusters from which synthetic X-ray and SZ catalogs can be derived. These can be used to calibrate simpler analytic models, as well as to build synthetic surveys (mock catalogs) which can be used to assess instrumental effects and survey biases. One would like to directly simulate $n(M,z), n(L_x,z), n(T,z), n(Y,z)$ from the governing equations for collisionless and collisional matter in an expanding universe.
Clearly, the quality of these statistical predictions relies on the ability to adequately resolve the internal structure and thermodynamical evolution of the ICM.

In Norman (2003) \cite{norman03}
I provided a historical review of the progress that has been made in simulating the evolution of gas in galaxy clusters motivated by X-ray observations. Since X-ray emission and the SZE are both consequences of hot plasma bound in the cluster's gravitational potential well, the requirements to faithfully simulate X-ray clusters and SZ clusters are essentially the same. Numerical progress can be characterized as a quest for higher resolution and essential baryonic physics. In this section I describe the technical challenges involved and the numerical methods that have been developed to overcome them. I then discuss the effects of assumed baryonic physics on ICM structure. Our point of reference is the non-radiative (so-called adiabatic) case, which has been the subject of an extensive code comparison
\cite{Frenk99}. I review the properties of adiabatic X-ray clusters,
and show that they fail to reproduce observed cluster scaling laws. I then show results of numerical hydrodynamic simulations incorporating radiative cooling, star formation, and galaxy feedback and their associated scaling properties.

\subsection{Dynamic range considerations}

\begin{figure}[htbp]
\includegraphics[width=3in,height=1.7in]{fig8a.eps}
\includegraphics[width=2.3in,height=1.5in]{fig8b.eps}
\caption{Left: A range of length scales of $\sim $250 separates the size of a reasonable survey volume and the virial radius of a rich cluster.
Right: Simplified structure of the ICM in a massive cluster. A range of length scales of $\sim $20-30 separates the virial radius and the core radius. }
\label{fig7}
\end{figure}

Figure 7 illustrates the dynamic range difficulties encountered with simulating a statistical ensemble of galaxy clusters, while at the same time resolving their internal structure. Massive clusters are rare at any redshift, yet these are the ones most that are most sensitive to cosmology.
From the cluster mass function (Fig. 6a), in order to get adequate statistics, one deduces that one must simulate a survey volume many hundreds of megaparsecs on a side (Fig. 7a). A massive cluster has a virial radius of
$\sim $2 Mpc. It forms via the collapse of material within a comoving Lagrangian volume of $\sim $15 Mpc. However, tidal effects from a larger region (50-100 Mpc) are important on the dynamics of cluster formation. The internal structure of cluster's ICM is shown in Fig. 7b. While clusters are not spherical, two important radii are generally used to characterize them:
the virial radius, which is the approximate location of the virialization shock wave that thermalizes infalling gas to 10-100 million K, and the core radius, within which the baryon densities plateau and the highest X-ray emissions and SZ intensity changes are measured. A typical radius is $\sim $200 kpc. Within the core, radiative cooling and possibly other physical processes are important. Outside the core, cooling times are longer than the Hubble time, and the ICM gas is effectively adiabatic. If we wanted to achieve a spatial resolution of 1/10 of a core radius everywhere within the survey volume, we would need a spatial dynamic range of D=500 Mpc/20 kpc = 25,000.
The mass dynamic range is more severe. If we want 1 million dark matter particles within the virial radius of a $10^{15} M_{\odot}$ cluster, then we would need $N_{particle} =M_{box} /M_{particle} =\Omega _m \rho _{crit}
L^3/10^9\approx 10^{11}$ if they were uniformly distributed in the survey volume.

Two solutions to spatial dynamic range problem have been developed: tree codes for gridless N-body methods \cite{KWH96,syw01}
and adaptive mesh refinement (AMR) for Eulerian particle-mesh/hydrodynamic methods \cite{bn97,Kravtsov97,Teyssier02,OShea04}.
Both methods increase the spatial resolution automatically in collapsing regions as described below. The solution to the mass dynamic range problem is the use of multi-mass initial conditions in which a hierarchy of particle masses is used, with many low mass particles concentrated in the region of interest. This approach has most recently used by Springel et al. (2000)
\cite{springel00},
who simulated the formation of a galaxy cluster dark matter halo with
$N=6.9\times 10^6$ dark matter particles, resolving the dark matter halos down to the mass scale of the Fornax dwarf spheroidal galaxy. The spatial dynamic range achieved in this simulation was $R=2\times 10^5$. Such dynamic ranges have not yet been achieved in galaxy cluster simulations with gas.

\subsection{Simulating cluster formation}

Simulations of cosmological structure formation are done in a cubic domain which is comoving with the expanding universe. Matter density and velocity fluctuations are initialized at the starting redshift chosen such that all modes in the volume are still in the linear regime.
Once initialized, these fluctuations are then evolved to z=0 by solving the equations for collisionless N-body dynamics for cold dark matter, and the equations of ideal gas dynamics for the baryons in an expanding universe. Making the transformation from proper to comoving coordinates $\vec {r}=a(t)\vec {x}$, Newton's laws for the collsionless dark matter particles become
\begin{equation}
\label{eq35}
\frac{d\vec {x}_{dm} }{dt}=\vec {\upsilon }_{dm} ,\mbox{ }\frac{d\vec
{\upsilon }_{dm} }{dt}=-2\frac{\dot {a}}{a}\vec {\upsilon }_{dm}
-\frac{1}{a^2}\nabla _x \phi
\end{equation}
where $x$ and $v$ are the particle's comoving position and peculiar velocity,
respectively, and $\phi$ is the comoving gravitational potential that includes baryonic and dark matter contributions. The hydrodynamical equations for mass, momentum, and energy conservation in an expanding universe in comoving coordinates are (\cite{Anninos97})
\begin{equation}
\label{eq36}
\begin{array}{l}
\frac{\partial \rho _b }{\partial t}+\nabla \cdot (\rho _b \vec {\upsilon
}_b )+3\frac{\dot {a}}{a}\rho _b =0, \\
\frac{\partial (\rho _b \upsilon _{b,i} )}{\partial t}+\nabla \cdot [(\rho _b \upsilon _{b,i} )\vec {\upsilon }_b +5\frac{\dot {a}}{a}\rho _b \upsilon _{b,i} =-\frac{1}{a^2}\frac{\partial p}{\partial x_i }-\frac{\rho _b
}{a^2}\frac{\partial \phi }{\partial x_i }, \\
\frac{\partial e}{\partial t}+\nabla \cdot (e\vec {\upsilon }_b )+p\nabla
\cdot \vec {\upsilon }_b +3\frac{\dot {a}}{a}e=\Gamma -\Lambda , \\
\end{array}
\end{equation}
where $\rho_b, p$ and $e$, are the baryonic density, pressure and internal energy density defined in the proper reference frame, $\vec {\upsilon }_b $ is the comoving peculiar baryonic velocity, $a=1/(1+z)$ is the cosmological scale factor, and $\Gamma $ and $\Lambda $ are the microphysical heating and cooling rates. The baryonic and dark matter components are coupled through Poisson's equation for the gravitational potential
\begin{equation}
\label{eq37}
\nabla ^2\phi =4\pi Ga^2(\rho _b +\rho _{dm} -\bar {\rho }(z))
\end{equation}
where $\bar {\rho }(z)=3H_0 \Omega _m (0)/8\pi Ga^3$ is the proper background density of the universe.

The cosmological scale factor $a(t)$ is obtained by integrating the Friedmann equation (Eq. \ref{eq4}). To complete the specification of the problem we need the ideal gas equation of state $p=(\gamma -1)e$, and the gas heating and cooling rates. When simulating the ICM, the simplest approximation is to assume $\Gamma $ and $\Lambda =0$; i.e., no heating or cooling of the gas other than by adiabatic processes and shock heating.
Such simulations are referred to as adiabatic (despite entropy-creating shock waves), and are a reasonable first approximation to real clusters because except in the cores of clusters, the radiative cooling time is longer than a Hubble time, and gravitational heating is much larger than sources of astrophysical heating. However, as discussed in the paper by Cavaliere in this volume, there is strong evidence that the gas in cores of clusters has evolved non-adiabatically. This is revealed by the entropy profiles observed in clusters \cite{Ponman99} which deviate substantially from adiabatic predictions. In the simulations presented below, we consider radiative cooling due to thermal bremsstrahlung, and mechanical heating due to galaxy feedback, details of which are described below.

\subsection{Numerical methods overview}

A great deal of literature exists on the gravitational clustering of CDM using N-body simulations. A variety of methods have been employed including the fast grid-based methods particle-mesh (PM), and particle-particle+particle-mesh (P$^{3}$M) \cite{Efstathiou81},
spatially adaptive methods such as adaptive P$^{3}$M \cite{Couchman91},
adaptive mesh refinement \cite{Kravtsov97}, tree codes
\cite{BarnesHut86,WarrenSalmon94}, and hybrid methods such as TreePM
\cite{Xu99}. Because of the large dynamic range required,
spatially adaptive methods are favored, with Tree and TreePM methods the most widely used today. When gas dynamics is included, only certain combinations of hydrodynamics algorithms and collisionless N-body algorithms are ``natural''. Dynamic range considerations have led to two principal approaches: P$^{3}$MSPH and TreeSPH, which marries a P$^3$M or tree code for the dark matter with the Lagrangian smoothed-particle-hydrodynamics (SPH)
method \cite{Evrard88,KWH96,syw01}, and adaptive mesh refinement (AMR),
which marries PM with Eulerian finite-volume gas dynamics schemes on a spatially adaptive mesh
\cite{bn97,OShea04,Teyssier02,Kravtsov03}.
Pioneering hydrodynamic simulations using non-adaptive Eulerian grids
\cite{Kang94,Bryan94,BN98}
yielded some important insights about cluster formation and statistics, but generally have inadequate resolution to resolve their internal structure in large survey volumes. In the following we concentrate on our latest results using the AMR code \textit{Enzo} \cite{OShea04}.
The reader is also referred to the paper by Borgani et al. \cite{Borgani04} which presents recent,
high-resolution results from a large TreeSPH simulation.

\textit{Enzo} is a grid-based hybrid code (hydro + N-body) which uses the block-structured AMR algorithm of Berger {\&} Collela \cite{Berger89} to improve spatial resolution in regions of large gradients, such as in gravitationally collapsing objects. The method is attractive for cosmological applications because it: (\ref{eq1}) is spatially- and time-adaptive, (\ref{eq2}) uses accurate and well-tested grid-based methods for solving the hydrodynamics equations, and
(\ref{eq3}) can be well optimized and parallelized. The central idea behind AMR is to solve the evolution equations on a grid, adding finer meshes in regions that require enhanced resolution. Mesh refinement can be continued to an arbitrary level, based on criteria involving any combination of overdensity
(dark matter and/or baryon), Jeans length, cooling time, etc., enabling us to tailor the adaptivity to the problem of interest. The code solves the following physics models: collisionless dark matter and star particles,
using the particle-mesh N-body technique \cite{Hockney88}; gravity, using FFTs on the root grid and multigrid relaxation on the subgrids; cosmic expansion; gas dynamics, using the piecewise parabolic method (PPM)\cite{Collela84};
multispecies nonequilibrium ionization and H$_{2}$ chemistry, using backward Euler time differencing \cite{Anninos97}; radiative heating and cooling, using subcycled forward Euler time differencing
\cite{Anninos94}; and a parameterized star formation/ feedback recipe \cite{Cen92}. At the present time, magnetic fields and radiation transport are being installed. \textit{Enzo} is publicly available at
{\textit{http://cosmos.ucsd.edu/enzo}}.

\subsection{Structure of nonradiative clusters: the Santa Barbara test cluster}

In Frenk et al. \cite{Frenk99} 12 groups compared the results of a variety of hydrodynamic cosmological algorithms on a standard test problem. The test problem, called the Santa Barbara cluster, was to simulate the formation of a Coma-like cluster in a standard CDM cosmology ($\Omega_m=1$)
assuming the gas is nonradiative. Groups were provided with uniform initial conditions and were asked to carry out a
``best effort'' computation, and analyze their results at z=0.5 and z=0 for a set of specified outputs. These outputs included global integrated quantities,
radial profiles, and column-integrated images. The simulations varied substantially in their spatial and mass resolution owing to algorithmic and hardware limitations. Nonetheless, the comparisons brought out which predicted quantities were robust, and which were not yet converged. In Fig.
8 we show a few figures from Frenk et al. (1999) which highlight areas of agreement (top row) and disagreement (bottom row).

\begin{figure}[htbp]
\includegraphics[width=5in,height=3.33in]{fig9.eps}
\caption{The Santa Barbara test cluster. Top row, left to right: profiles of dark matter density, gas density, and gas pressure. Bottom row, left to right: profiles of gas temperature, gas entropy, and X-ray emissivity.
Different symbols correspond to different code results. From \cite{Frenk99}.}
\label{fig8}
\end{figure}

The top row shows profile of dark matter density, baryon density, and pressure for the different codes. All are in quite good agreement for the
\textit{mechanical structure} of the cluster. The dark matter profile is well described by an NFW profile which has a central cusp \cite{NFW96}. The baryon density profiles show more dispersion, but all codes agree that the profile flattens at small radius, as observed. All codes agree extremely well on the gas pressure profile, which is not surprising, since mechanical equilibrium is easy to achieve for all methods even with limited resolution. This bodes well for the interpretation of SZE observations of clusters, since the Compton y parameter is proportional to the projected pressure distribution.
In section 5 we show results from a statistical ensemble of clusters which bear this out.

The bottom row shows the thermodynamic structure of the cluster, as well as the profile of X-ray emissivity. The temperature profiles show a lot of scatter within about one-third the virial radius (=2.7 Mpc).
Systematically, the SPH codes produce nearly isothermal cores, while the grid codes produce temperature profiles which continue to rise as r$\rightarrow $0. The origin of this discrepancy has not been resolved, but improved SPH formulations come closer to reproducing the AMR results
\cite{Ascasibar03}. This discrepancy is reflected in the entropy profiles. Again, agreement is good in the outer two-thirds of the cluster, but the profiles show a lot of dispersion in the inner one third. Discounting the codes with inadequate resolution, one finds the SPH codes produce an entropy profile which continues to fall as r$\rightarrow $0, while the grid codes show an entropy core, which is more consistent with observations \cite{Ponman99}.
The dispersion in the density and temperature profiles are amplified in the X-ray emissivity profile, since $\varepsilon _x \propto
\rho _b^2 T^{1/2}$. The different codes agree on the integrated X-ray luminosity of the cluster only to within a factor of 2. This is primarily because the density profile is quite sensitive to resolution in the core; any underestimate in the core density due to inadequate resolution is amplified by the density squared dependence of the emissivity. This suggests that quite high resolution is needed, as well as a good grasp on non-adiabatic processes operating in cluster cores, before simulations will be able to accurately predict X-ray luminosities.

\subsection{A numerical sample of adiabatic clusters: Universal Temperature Profile}

Three questions one can ask about the Santa Barbara cluster results are: 1)
is the cluster statistically representative, 2) do the results change substantially for a $\Lambda $CDM cosmology (the SB cluster assumed an EdS cosmology), and 3) what is the effect of additional baryonic physics on cluster structure? We address these questions here by summarizing results of
\textit{Enzo} simulations of the ICM in a sample of clusters in a concordance $\Lambda$CDM model drawn from a survey volume 256h$^{-1}$ Mpc on a side. Multimass initial conditions and AMR are used to achieve high spatial and mass resolution within the clusters. More details can be found in \cite{Loken02,Motl04,Motl05,
Hallman05}.

\begin{figure}[htbp]
\includegraphics[width=2.5in,height=1.7in]{fig10.eps}
\includegraphics[width=2.5in,height=1.7in]{fig11.eps}
\caption{Left: Temperature profiles from a sample of adiabatic cluster simulations (from Loken et al. 2002). Black curves bound the 1s confidence band from Markevitch et al. (1998). Right: Effect of radiative cooling on temperature profiles, compared with adiabatic sample average (red line) and observational data for cooling flow clusters (triangles) and non-cooling flow clusters (squares).}
\label{fig9}
\end{figure}

Fig. \ref{fig9} shows spherically averaged temperature profiles for 13(3)
$\Lambda$CDM(SCDM)
simulated clusters at z=0 analyzed by Loken et al. (2002)
\cite{Loken02}. These were chosen from a total sample of 22(10) clusters because their 2D projected temperature maps were symmetric; the rejected non-symmetric clusters were in various states of merging. The smooth black curves bound the 1$\sigma $
confidence band from Markevitch et al. (1998)\cite{Markevitch98}
who analyzed temperature profiles from a sample of 17 symmetric X-ray clusters observed with ASCA.
When temperature is normalized by the integrated emission-weighted temperature and the radius by the virial radius, both the observed data and the simulated data collapse to a narrow band, suggesting a universal temperature profile (UTP) outside the core region.
The fit to the numerical data is $T\propto
(1+r/\alpha )^{-\delta }$, with $\alpha \sim $r$_{vir}$/1.5 and $\delta
\sim $1.6. The $\Lambda$CDM clusters and SCDM clusters exhibit the same profile,
with a suggestion of a slightly higher normalization for clusters in the critically closed model. The fit is in good agreement with observations over the range 0.2$<$r/r$_{vir}<$0.5, but diverges at small radius where the effects of non-adiabatic processes appear to be at play \cite{deGrandi02}.
The reality of the UTP was somewhat controversial when early results from Newton/XMM were showing large isothermal cores. However, the latest Chandra observations of 13 nearby, relaxed clusters have shown that the UTP provides an excellent description for temperature profiles outside
$r\sim 0.15r_{vir}$ \cite{Vikhlinin04}. Subsequent numerical studies by Ascasibar et al. \cite{Ascasibar03} and Borgani et al. \cite{Borgani04}
using SPH have found agreement with the AMR results of Loken et al. The general agreement of numerical and observational results suggests that the declining temperature profile is a natural consequence of gravitational heating of the ICM during the process of cluster formation.

\subsection{Effect of additional physics}

\begin{figure}[htbp]
\includegraphics[width=3in,height=3.5in]{fig12.eps}
\includegraphics[width=2.5in,height=2in]{fig13.eps}
\caption{Left: Columns show X-ray surface brightness, projected temperature, and Compton y-parameter for a $M=2\times 10^{15} M_{\odot}$ cluster assuming different baryonic physics. Field of view is 5 h$^{-1}$ Mpc. Right:
Corresponding spherically averaged radial temperature profiles.}
\label{fig10}
\end{figure}

Within r=0.15 r$_{vir}$, Vikhlinin et al. \cite{Vikhlinin04}
found large variation in temperature profiles, but in all cases the gas is cooler than the cluster mean. This suggests that radiative cooling is important in cluster cores,
and possibly other effects as well. It has been long known that $\sim 60$ percent of nearby, luminous X-ray clusters have central X-ray excesses,
which has been interpreted as evidence for the presence of a cluster-wide cooling flows \cite{Fabian94}. More recently, Ponman et al. \cite{Ponman99}
have used X-ray observations to deduce the entropy profiles in galaxy groups and clusters.
They find an entropy floor in the cores of clusters indicative of extra,
non-gravitational heating, which they suggest is feedback from galaxy formation. It is easy to imagine cooling and heating both may be important to the thermodynamic evolution of ICM gas.

To explore the effects of additional physics on the ICM, we recomputed the entire sample of clusters changing the assumed baryonic physics, keeping initial conditions the same. Three additional samples of about 100 clusters each were simulated: The ``radiative cooling'' sample assumes no additional heating, but gas is allowed to cool due to X-ray line and bremsstrahlung emission in a 0.3 solar metallicity plasma. The ``star formation'' sample uses the same cooling, but additionally cold gas is turned into collisionless star particles at a rate $\dot {\rho }_{SF} =\varepsilon _{sf}
\frac{\rho _b }{\max (\tau _{cool} ,\tau _{dyn} )}$ , where $\varepsilon _{sf}$ is the star formation efficiency factor $\sim $0.1, and $\tau _{cool}$ and $\tau _{dyn}$ are the local cooling time and freefall time,
respectively. This locks up cold baryons in a non-X-ray emitting component,
which has been shown to have an important effect of the entropy profile of the remaining hot gas \cite{Bryan99,Voit00}. Finally, we have the ``star formation feedback'' sample, which is similar to the previous sample, except that newly formed stars return a fraction of their rest mass energy as thermal and mechanical energy. The source of this energy is high velocity winds and supernova energy from massive stars. In \textit{Enzo}, we implement this as thermal heating in every cell forming stars: $\Gamma _{sf}
=\varepsilon _{SN} \dot {\rho }_{SF} c^2$. The feedback parameter depends on the assumed stellar IMF the explosion energy of individual supernovae. It is estimated to be in the range $10^{-6}\le \varepsilon _{SN} \le 10^{-5}$ \cite{Cen92}. We treat it as a free parameter.

Fig. \ref{fig10} shows synthetic maps of X-ray surface brightness, temperature, and Compton y-parameter for a $M=2\times 10^{15} M_{\odot}$ cluster at z=0 for the three cases indicated. The ``star formation'' case is omitted because the images are very similar to the ``star formation feedback'' case (see reference
\cite{Motl05}.) The adiabatic cluster shows that the X-ray emission is highly concentrated to the cluster core. The projected temperature distribution shows a lot of substructure, which is true for the adiabatic sample as a whole \cite{Loken02}. A complex virialization shock is toward the edge of the frame. The y-parameter is smooth, relatively symmetric, and centrally concentrated. The inclusion of radiative cooling has a strong effect on the temperature and X-ray maps, but relatively little effect on the SZE map. The significance of this is discussed in Section 5. In simulations with radiative cooling only, dense gas in merging subclusters cools to 10$^{4}$ K and is brought into the cluster core intact \cite{Motl04}. These cold lumps are visible as dark spots in the temperature map. They appear as X-ray bright features. The inclusion of star formation and energy feedback erases these cold lumps, producing maps in all three quantities that resemble slightly smoothed versions of the adiabatic maps. However, an analysis of the radial temperature profiles (Fig. \ref{fig10}) reveal important differences in the cluster core. The temperature continues to rise toward smaller radii in the adiabatic case, while it plummets to $\sim $10$^{4}$ K for the radiative cooling case. While the temperature profile looks qualitatively similar to observations of so-called cooling flow clusters, our central temperature is too low and the X-ray brightness too high. The star formation feedback case converts the cool gas into stars, and yields a temperature profile which follows the UTP at $r\ge 0.15r_{vir} $, but flattens out at smaller radii. This is consistent with the high resolution
\textit{Chandra} observations of Vikhlinin et al. \cite{Vikhlinin04}.

\section{Comparisons and predictions for X-ray and SZE surveys}

In this section we shall compare the results of numerical hydrodynamical simulations with the analytic scaling laws derived in section 3, and compare with observational data. We will see that the X-ray temperature and the integrated SZE is a robust indicator of cluster mass with relatively little bias, while the X-ray luminosity is not because we cannot reliably simulate the X-ray emission from clusters.

\subsection{Analytic and numerical comparisons}

\begin{figure}
\centerline{\includegraphics[width=4in,height=2.5in]{fig14.eps}}
\caption{Comparing analytic and numerical predictions for cluster statistics.}
\label{fig11}
\end{figure}

We first ask the question how well do the simple analytic model estimates of cluster statistics agree with the results of numerical hydrodynamic simulations. This question was addressed by Bryan {\&} Norman 1998
\cite{BN98}.
Fig. \ref{fig11}
illustrates how the comparisons are made. For a given cosmological model Press-Schechter theory is used to calculate the halo mass function versus redshift (top rectangle). The observable quantities $n(T,z), N(L_x,z), n(Y,z)$
are then computed using the scaling relations presented in Section 3 for $L_x, T$ and $Y$ as a function of mass. Somewhat more work is involved deriving these results from numerical simulation (bottom rectangle). Initial conditions for the chosen cosmology are generated which specify dark matter and baryonic perturbations at the starting redshift. These perturbations are evolved use in the methods described in section 4 to z=0. The particle and baryonic distributions are output at specified redshifts for analysis.
Virialized objects are located using a group-finding algorithm on the dark matter particles list. Two popular techniques are friends-of-friends
\cite{Davis85} and HOP \cite{Eisenstein99}. In the friends-of-friends algorithm, two particles are part of the same group if their separation is less than some chosen value; chains of pairs then define groups. In the HOP algorithm, an estimate of the local density is associated with every particle. Each particle is linked to its densest neighbor and on to that particle's densest neighbor until one reaches the particle which is its own densest neighbor. All particles that are traced to the same such particle define the group. Once groups are found, centers of masses for each group are computed. With these centers determined, spherically averaged profiles of dark matter density, baryon density, temperature, etc. are computed by binning the 3D data into spherical shells. For each halo, the virial radius is determined by find the shell inside of which the mean total density (dark matter + baryons) equals the critical overdensity $\Delta_c$ (Section 3). Virial mass, X-ray luminosity, and emission weighted temperature are computed by numerical integration over the radial profiles of total density, X-ray emissivity, etc. With these quantities evaluated for each cluster in the sample, distribution functions are then computed.

\subsection{Cluster temperatures}

One of the most robust predictions of numerical simulations is the mass-temperature relation. Fig. \ref{fig12}a shows a comparison between analytic scaling relations and simulations for two cosmological models at three epochs. The simulations were carried out on fixed Eulerian grids of size 270$^{3}$ and 512$^{3}$ assuming the clusters are non-radiative. Good agreement is seen with a slight offset in normalization. Fitting Eq. \ref{eq28} to the data yields $f_T \approx 0.8.$ That the simulations reproduce the analytic scaling relations despite limited numerical resolution is a consequence of energy conservation, which is maintained to high accuracy by the numerical hydrodynamic method employed. Note that a cluster of a given mass is cooler at lower redshifts.

Fig. \ref{fig12}b shows the temperature distribution function as predicted by simulations (histograms) and Press-Schechter theory (curves) for a critically closed model (SCDM) and a low density model (OCDM). Generally,
agreement is good. Simulations underpredict the number of low temperature clusters due to resolution effects. The high temperature clusters are rare,
and thus not many are found in our small box. Despite these numerical limitations, one sees that the number of hot clusters evolves rapidly in the flat universe but evolves very little in the open universe.

Fig. \ref{fig13}a shows the predictions of simulations compared with the observational data of Henry {\&} Arnaud (1991)\cite{Henry91}.
The SCDM model is ruled out with high confidence, while the CHDM and OCDM models are marginally consistent with data. Eke, Cole {\&} Frenk (1996) \cite{ecf96} showed that with a suitable adjustment of $\sigma _{8}$, a critically closed, open, and
$\Lambda $-dominated models could all reproduce the observations
(Fig. \ref{fig13}b).
This illustrates what is known as the $\Omega _{0}-\sigma _{8}$
degeneracy in cluster abundances \cite{Bahcall97}. The redshift evolution of cluster abundances can in principle break this degeneracy, however this requires large samples of high redhift clusters with accurately measured temperatures. So far, the samples are small.
Temperatures are more difficult to measure than X-ray luminosities.
Nonetheless, available data shows mild evolution of the X-ray temperature function, consistent with a low density universe \cite{Rosati02}.

\begin{figure}[htbp]
\includegraphics[width=3in,height=2in]{fig15.eps}
\includegraphics[width=2.5in,height=2.5in]{Fig12b.eps}
\caption{Left: M-T scaling in a flat $\Omega _{m}$=1 universe (left) and an open $\Omega _{m}$=0.34 universe (right) for z=0, 0.5, and 1 (top to bottom). Symbols are measured values hydrodynamic simulations. Lines are the scaling relations from Eq. \ref{eq28}. with f$_{T}$=0.8 (from \cite{BN98}).
Right: Evolution of cumulative temperature distribution function for the two models shown in Fig 13 as predicted by theory (curves) and hydrodynamic simulations (histograms). The number of hot clusters evolves rapidly in the flat universe but evolves very little in the open universe.}
\label{fig12}
\end{figure}

\begin{figure}[htbp]
\includegraphics[width=2.5in,height=2in]{fig17.eps}
\includegraphics[width=2.5in,height=2.5in]{fig18.eps}
\caption{Left: Comparison of z=0 cluster temperature function from Henry {\&} Arnaud (1991) with hydrodynamic simulations. SCDM model ($\Omega _{0}$=1, $\sigma _{8}$=1.05) is ruled out with high confidence, OCDM model ($\Omega _{0}$=0.34, $\sigma _{8}$=0.75) is marginally consistent with data. (from Bryan {\&} Norman 1998). Right:
Figure 18. Illustration of the $\Omega _{0}-\sigma _{8}$ degeneracy. Good agreement with data is found for flat, open, and $\Lambda $-dominated cosmological models with a suitable adjustment of $\sigma _{8}$. From \cite{ecf96}.}
\label{fig13}
\end{figure}

\subsection{Cluster X-ray luminosities}

The most easily measured property of an X-ray cluster is its luminosity.
However, as we shall see, this is the most difficult quantity to predict using numerical simulations. This is because the integrated X-ray luminosity of a cluster is dominated by emission from the core region, which is challenging to resolve numerically, and it is affected by heating and cooling processes which are as yet not well understood. The advent of multiscale numerical simulation techniques has ameliorated the numerical resolution difficulties. As one can see from Fig. \ref{fig8}f, the X-ray emissivity peaks at about $0.1 r_{vir}$ for the adiabatic Santa Barbara cluster. SPH and AMR simulations can now resolve this scale with ten resolution elements or more in large cosmological volumes. Fig. \ref{fig14} shows the $L_x-M$ and $L_x-T$ scaling relation derived from our large sample of adiabatic galaxy clusters simulated using AMR in a $\Lambda$CDM universe. The numerical clusters are in good agreement with the analytic virial scaling relations $L_x \propto M^{4/3}$ and $L_x \propto T^2$ without resort to resolution corrections
(cf. Bryan {\&} Norman 1998). However, the adiabatic models are in conflict with the observed scaling relation, which are $L_x \propto M^{1.8}$
and $L_x \propto T^3$ for $T >2$ keV \cite{Rosati02}.

\begin{figure}
\includegraphics[width=5in,height=2.5in]{fig19.eps}
\caption{High resolution AMR simulations of adiabatic clusters (red crosses) agree with analytic scaling predictions (red lines), but disagree with observations (black lines). Addition of radiative cooling (blue diamonds) improves agreement, but produces too many clusters with cool cores. Figures courtesy P. Motl.}
\label{fig14}
\end{figure}

The disagreement between the predictions of adiabatic simulations and observations can be taken as strong evidence of the importance of non-adiabatic processes in the cores of galaxy clusters. The effect of radiative cooling is shown by the open diamonds in Fig. \ref{fig14}. Although the $L_x-M$ and $L_x-T$ scaling steepens in the direction of observations, we view these models as unrealistic since every cluster in the sample has too much cold gas in the core, contrary to observations. The scaling relations for the ``star formation'' and ``star formation feedback'' samples are show in Fig. \ref{fig15}a.
The conversion of cool gas into stars produces clusters whose temperature and X-ray surface brightness profiles are in better agreement with observations, and steepens the $L_x-T$
relation somewhat relative the to adiabatic clusters. The inclusion of supernova heating has a rather minor effect when compared to the magnitude of the change including star formation. This is best illustrated in Fig. \ref{fig15}b,
which shows the scatter of central entropy versus central temperature for the adiabatic, star formation, and star formation feedback cluster samples.
An analysis of a sample of clusters by Ponman et al. (1999)
\cite{Ponman99} revealed the existence of an ``entropy floor''. This feature has been interpreted as evidence of galaxy formation feedback which increases gas entropy. The same data has been explained as the result of radiative cooling \cite{Bryan99,Voit00}
which locks up low entropy gas in stars where it does not contribute to X-ray emission. The magnitude of the entropy floor strongly suggests the heating explanation. The failure of star formation feedback simuations to exhibit the entropy floor may be due to limited mass resolution. The galaxy mass function is not well sampled is these simulations; indeed, only the central dominant galaxy and one or two of the most massive galaxies are present in these simulations. Perhaps higher resolution simuations will improve agreement. AGN heating is another source of energy input that may be important, especially in the cores of clusters \cite{Ruszkowski02}.
Numerical simulations incorporating these effects are in their infancy, and certainly not at the stage where large ensembles can be simulated for statistical analysis.

\begin{figure}[htbp]
\includegraphics[width=2.5in,height=2.5in]{fig20.eps}
\includegraphics[width=2.5in,height=2.5in]{fig21.eps}
\caption{Left: Effect of baryonic physics on the L-T relation for three AMR cluster samples: adiabatic (crosses), star formation (triangles), and star formation feedback (squares). Right: Central entropy versus central temperature for the cluster samples in Fig \ref{fig12}. The dashed line is the observed ``entropy floor''. Figures courtesy P. Motl.}
\label{fig15}
\end{figure}

\subsection{Prospects for SZE cluster surveys}

The sensitivity of X-ray luminosity to numerical resolution and baryonic processes motivates us to look for other more robust indicators of a cluster's mass. Temperature is such an indicator, however this is more difficult to measure than X-ray luminosity even at low redshifts. At high redshifts the task becomes even more difficult because of the severe $(1+z)^{-4}$
surface brightness dimming of the X-ray flux. In this section we explore the thermal SZE effect as a mass indicator based on our four catalogs of simulated galaxy clusters. Based on these models, we find that the integrated SZE $y_{500}$ is a less biased indicator of cluster mass than either the X-ray luminosity or temperature, and shows far less scatter than the central value of the SZE intensity change $y_0$. More details can be found in references
\cite{Motl05,Hallman05}

\begin{figure}[htbp]
\includegraphics[width=3in,height=2in]{fig22.eps}
\includegraphics[width=2in,height=2in]{fig23.eps}
\caption{Left: The ``lightcurve'' for the central value of the Compton parameter,
$y_0$, obtained from tracking one particular halo from a redshift of 4 to the present epoch. Major mergers can boost $y_0$ by a factor of 10. Right:
Projected y parameter distribution of cluster at the epochs marked by vertical lines in the lightcurve. Figures courtesy P. Motl.}
\label{fig16}
\end{figure}

As has been discussed elsewhere in this volume (Rephaeli, Birkinshaw), the thermal SZE is an attractive cosmological probe because it is redshift independent. The strength of the SZE is proportional to the Compton parameter, y, which for non-relativistic electrons is essentially the integral of the gas pressure through the cluster
\begin{equation}
y=\int {\frac{k_B T}{m_e c^2}} \sigma _T n_e d\ell \propto \int {nTd\ell .}
\end{equation}
The central value of the Compton y parameter we refer to as $y_0$. We define the integrated SZE $y_{500}$ as the area integral of the y parameter out to
$r_{500}$, the radius inside of which the mean density is 500 times the critical density:
\begin{equation}
y_{500} =2\pi \int\limits_0^{r_{500} } {y(r)rdr.}
\end{equation}
The detectability of a cluster is given by its SZ cross section (Section 3),
which is essentially $y_{500} /d_A^2 \propto (1+z)^{-2}$. This is far more favorable redshift dependence than X-rays provide.

Fig. \ref{fig16}a shows the redshift evolution of $y_0$ for the most massive cluster in our sample. As can be seen, $y_0$ exhibits a secular increase as the cluster potential deepens, but is boosted by up to a factor of $\sim $20(2) during major(minor) merger events. The duration of these events is of order the dynamical time $\sim $1-2 Gyr. The effect of mergers induces considerable scatter into scaling between $y_0$ and the enclosed mass $M_{500}$ in our sample of clusters at z=0 (Fig. \ref{fig17}a). By contrast, $y_{500}$
shows a much tighter correlation (Fig. \ref{fig17}b). The reason for this is illustrated in the lower two panels of Fig. \ref{fig17} where we plot the central value of the gas pressure $p_0$ and the volume averaged pressure $p_{500}
=\frac{3}{4\pi r_{500}^3 }\int\limits_0^{r_{500} } {p(\vec {x})d^3\vec {x}}
$. The central pressure exhibits large scatter due to the presence of shock waves induced by mergers. However, the volume averaged pressure exhibits relatively little scatter. This is a consequence of virial equilibrium and tells us that the clusters are approximately in equilibrium within $r_{500}$.

Fitting the data to a power law of the form
\begin{equation}
y_{500} =A\left[ {\frac{M_{500} }{10^{14}M_{\odot}}} \right]^\alpha
\end{equation}
for each of our 4 catalogs, we find $\alpha \sim 1.6, \sigma_{\alpha}\sim 0.025$
for the adiabatic, star formation, and star formation feedback samples, and
$\alpha \sim 1.7, \sigma_{\alpha}\sim 0.03$
for the radiative cooling sample. The scaling exponent is consistent with the findings of da Silva et al (2004) \cite{daSilva04}.
Ignoring the radiative cooling only runs as unrealistic, we find that the scaling is relatively insensitive to baryonic physics. This is both reassuring and understandable in that regardless of the thermodynamics of the gas,
hydrostatic equilibrium is maintained to a good approximation. By looking back through our catalogs in redshift, we find that the coefficient A is independent of redshift.

\begin{figure}
\centerline{\includegraphics[width=5in,height=4in]{Fig17b.eps}}
\caption{Upper: The scaling relations between $y_0$ and $y_{500}$ and the total cluster mass within the same radius at z=0 for the star formation with feedback cluster sample. Two randomly chosen, orthogonal projections for each cluster are plotted as individual points and the catalog contains $\sim 100$ clusters at this epoch in the mass range $1 \times 10^{14} M_{\odot} \leq M_{200} \leq 2 \times 10^{15} M_{\odot}$. The best fit relations are plotted as solid lines. Lower: Central pressure and pressure integrated inside sphere of radius
$r_{500}$ plotted against cluster total mass. From \cite{Motl05}.}
\label{fig17}
\end{figure}

\subsection{Cluster mass estimates compared}

To assess the systematic biases and relative scatter of various means of estimating cluster masses from X-ray and SZE data, we ``observed'' our four clusters samples and analyzed the resulting synthetic images in the same way as observations. Our goal was to find both the best cluster mass estimator and best method of analysis. These were defined as the combination which produce the least bias and smallest scatter between inferred cluster mass and actual (simulated) mass. Here we merely summarize our findings; for details the reader is referred to \cite{Hallman05}.

Cluster masses can be obtained from X-ray and thermal SZE observations in several ways. The most widely used is the isothermal beta model, wherein it is assumed the electron number density is spherically symmetric and follows

\begin{equation}\label{eq41}
n_e (r)=n_{e0} \left[ {1+\left( {\frac{r}{r_c }} \right)^2} \right]^{-3\beta
/2},
\end{equation}

where $n_{e0}$ is the central electron density. Approximating the gas as isothermal with average temperature $\langle T \rangle$ within the fitting radius, then the X-ray surface brightness is
\begin{equation}\label{eq42}
S_X (r)=S_{X0} \left[ {1+\left( {\frac{r}{r_c }} \right)^2}
\right]^{\frac{1}{2}-3\beta }
\end{equation}
where $S_{X0} \propto n_{e0}^2 \left\langle T \right\rangle ^{\frac{1}{2}}$.
Similarly for the SZE, a beta model density distribution results in a projected radial distribution for the Compton y parameter
\begin{equation}\label{eq43}
y(r)=y_0 \left[ {1+\left( {\frac{r}{r_c }} \right)^2}
\right]^{\frac{1}{2}-\frac{3\beta }{2}}
\end{equation}
where $y_{0} \propto n_{e0} \left\langle T \right\rangle.$

By fitting the observed profiles of $S_x(r)$ and $y(r)$ one obtains
$\beta$ and $r_c$, the core radius. With $\left\langle T \right\rangle$
measured observationally, $n_{e0}$ can then be calculated.
One then integrates Eq. \ref{eq41} to find the gas mass within the fitting radius $r_<$. The cluster dynamical mass is then $M_{dyn} (r_< )=M_{gas} (r_<
)/f_b (r_< ),$ where $f_b$ is the baryon fraction which may in general be different from the cosmic mean $\Omega_m/\Omega_b$ depending upon the radius. Henceforth we will refer to mass estimates made in this way as X-ray-ISO and SZE-ISO.

Recently is has been shown both in simulations (Loken et al. 2002, Section 4)
and in X-ray observations (Vikhlinin et al. 2005) that clusters are not isothemal at large radii, but follow a universal temperature profile (UTP)
\begin{equation}\label{eq43a}
T(r)=\left\langle T \right\rangle _{500} \left[ {1+\left( {\frac{r}{\alpha r_{500} }} \right)^2} \right]^{-\delta }
\end{equation}
where $\langle T_{500} \rangle$ is the average temperature inside
$r_{500}$, and $\alpha$ and $\delta$ are fitting parameters determined from a large sample of clusters. Improved mass estimates can be obtained by geometric deprojection of the X-ray and SZE profiles if one knows the temperature of each radial shell. This is provided by the UTP. For example, the X-ray surface brightness can be deprojected to yield the X-ray emissivity in each spherical shell (e.g., \cite{Buote00}).
Knowing the temperature profile, once can obtain the mass in each shell. A similar technique can be applied to the SZE profile. By summing over shells,
one obtains the gas mass within the fitting radius. Mass estimates obtained in this way we refer to as X-ray UTP and SZE-UTP.

\begin{figure}
\centerline{\includegraphics[width=4in,height=2.5in]{fig25.eps}}
\caption{Comparison of median values and scatter of gas mass estimates inside
$r_{500}$ for full SFF cluster sample (triangles) and cleaned SFF sample
(diamonds) at z=0 for each of four methods: UTP-X-ray (U-X), UTP-SZE (U-SZ),
isothermal X-ray (I-X), and isothermal SZE (I-SZ) as descibed in the text.
From \cite{Hallman05}.}
\label{fig18}
\end{figure}

Fig. \ref{fig18} shows the ratio of the measured mass to the actual mass for the star formation feedback catalog of simulated clusters for the four methods described above. The triangles are the full sample, whereas the diamonds are for samples which have been cleaned of highly distorted clusters resulting from recent mergers. The error bars enclose the 80{\%} confidence range. As can be seen, cleaning the sample reduces the scatter considerably. Among the different methods, the X-ray measurements yield the smallest scatter, but overestimate the cluster masses by 5-10{\%}. Conversely, the SZE-UTP measurements yield unbiased estimates the cluster mass, with somewhat more scatter. As shown in \cite{Hallman05}, the scatter in the SZE estimates decreases as the fitting radius is increased to $r_{200}$, while no improvement is seen in the X-ray estimates. This is to be expected since the X-ray emission is heavily core-weighted, while the SZE samples larger radii.

\subsection{Conclusions}

We have seen that galaxy clusters are sensitive cosmological probes provided their masses can be measured with precision. Both analytic estimates and numerical simulations show that the evolution of their comoving number density is sensitive to cosmology. With improvements in X-ray observations and impending large area surveys to detect clusters via the SZE, it is paramount to assess the accuracy to which cluster masses can be obtained observationally. Based on our catalogs of simulated clusters using adaptive mesh refinement, we find that gas masses can be measured to $\sim $10{\%}
accuracy with 80{\%} confidence. Our study ignores instrumental or other observational effects. These limits in precision are a direct result of the deviation of the simulated clusters from simple assumptions about their physical and thermodynamic properties, dynamical state, and sphericity.
Comparing a variety of methods, we find that SZE methods assuming a UTP produce the smallest scatter when estimating masses from a raw sample of clusters. Cleaning the cluster sample of obvious mergers does not improve the SZE estimates much, but improves the X-ray estimates substantially. As a practical matter, we find SZE methods are superior for mass estimation of large samples of clusters out to high redshift. This is particularly true if the cutoff radius is the virial radius, as this has the effect of smoothing out any boosting effects in the cluster core due to mergers.

Comparing mass estimates from our four catalogs, we find that our conclusions are insensitive to assumed baryonic physics, except for the cooling sample, which yields unrealistic-looking clusters. Mass estimates derived from the cooling sample are systematically high (50-100{\%}) despite excising the overluminous X-ray core. Reasons for this are discussed in detail in reference \cite{Hallman05}.
We conclude that cool core clusters are poor candidates for precision mass estimation, in disagreement with previous studies \cite{Allen98}.

\acknowledgments The author is indebted to his collaborators Greg Bryan,
Jack Burns, Eric Hallman, Chris Loken, and Patrick Motl whose results, both published and unpublished, are presented here. Simulations were performed at the National Center for Supercomputing Applications of the University of Illinois, Urbana-Champaign with support from NSF grants ASC-9318185,
AST-09803137.

\end{document}
\endinput
\title{One-way quantum computation with four-dimensional photonic qudits}

\begin{abstract}

We consider the possibility of performing linear optical quantum computation making use of extra photonic degrees of freedom. In particular we focus on the case where we use photons as quadbits,
4-dimensional photonic qudits. The basic 2-quadbit cluster state is a hyper-entangled state across polarization and two spatial mode degrees of freedom. We examine the non-deterministic methods whereby such states can be created from single photons and/or Bell pairs, and then give some mechanisms for performing higher-dimensional fusion gates.

\end{abstract}

\section{Introduction}
\label{Intro} Optical quantum computation is a strong candidate for a scalable quantum computer. Photons have low decoherence rates, and high fidelity optical components are readily available. In this article we focus on the linear optical quantum computation (LOQC) paradigm, for which the resource overheads of the original LOQC proposal \cite{Knill01} have been greatly reduced by making use \cite{Nielsen04,
Browne05,Kok,kieling06,kieling07,Gross,Rohde,
Dawson06,Dawson062,Gilbert,Zhang,varnava} of the one-way quantum computation model \cite{Briegel01,Raussendorf01}.

Significant hurdles to practical LOQC remain, however. At present the primary obstacle is a deterministic source of photons. Much progress has been made along these lines
\cite{Walther05,everyone}, but it is clear that there is still a long way to go. Particularly exciting is the possibility of creating ``on-demand'' entangled pairs of photons \cite{toshiba,
gershoni}, which obviate the need for initially creating such entangled pairs from single photons \cite{pan}. The investigations of this paper are based around an assumption that at some time in the near future efficient deterministic sources of either single photons or entangled photon pairs will become available.

It is not always obvious how to compare the resource requirements of various different proposals for implementing LOQC within the cluster state paradigm (e.g. how many single photon sources,
memory units and feedforward steps is an entangled pair source
``worth''?). Since the primary difficulties for LOQC relate to sources and detectors, it is clear that schemes which reduce the number of photons actually used in an implementation are desirable \footnote{In general, within the circuit model, the results of \cite{Muthukrishnan} suggest that quantum computation with quadbits can be expected to result in a space saving of at least $O(\log_2 d)$, and a time saving of at least $O(\log_2 d)^2$. The extent to which such savings translate into the cluster state model are largely unexplored - they will depend on optimal decompositions of qudit cluster circuits for general two-qudit unitary operations. Such optimal decompositions are not completely characterized for even the qubit case yet.}. A travelling photonic wavepacket is in principle a multi-mode creature, and thus can be treated as a $d$-dimensional quantum system (a ``qu$d$it''). There is a $d$-dimensional version of cluster state computing \cite{Zhou03, BillHall}, and one purpose of this paper is to explore procedures whereby such
$d$-dimensional clusters can be created. The second motive is to examine some basic ``initial state'' resource tradeoffs, such as:
``how many Bell pairs does it take to make a hyper-entangled state''.

For concreteness we focus on the \emph{quadbit} case -
specifically, we treat a single photon as a four-dimensional quantum system; using the two polarization states of two different spatial modes to encode the four levels.

\section{Quadbit cluster states}
\label{Sec:01}
\subsection{General quadbit cluster states}
\label{Sec:0101}

In this section we review the features of quadbit cluster states we shall make use of - a pedagogical overview of the higher-dimensional cluster state computing can be found in
\cite{BillHall}.

We label the computational basis states $\{|\bar0 \rangle ,|\bar1 \rangle,
|\bar2 \rangle ,|\bar3 \rangle \}$ (use of the overbar is to prevent confusion with 0 and 1 photon Fock states). In terms of these we can define the quadbit version of a Hadamard rotation, which rotates the computational basis state $|\bar i\rangle$ to $|+_i\rangle$ ($i=0,1,2,3$), where
\begin{eqnarray}
\label{five:eq03} | +_{i} \rangle &=& {1\over 2} ( |\bar0 \rangle
+ {\rm e}^{{\rm i} {i \pi \over 2}} |\bar1 \rangle + {\rm e}^{{\rm i} \,i \pi}|\bar2 \rangle + {\rm e}^{{\rm i} {3 i \pi
\over 2}} |\bar3 \rangle),
\end{eqnarray}

A 2-quadbit cluster state $|QdC_2\rangle$ is then given by the superposition
\[|QdC_2\rangle={1 \over 2}\sum_{i=0}^3|\bar i\rangle|+_i\rangle,\] which should be compared with the equivalent 2-qubit cluster state
$|C_2\rangle=(|0\rangle|+\rangle+|1\rangle|-\rangle)/\sqrt{2}$. In the case of qubits a two-qubit (non-destructive) parity gate operation would fuse
\cite{Browne05} two 2-qubit clusters into the state
$|C_3\rangle=(|+\rangle|00\rangle|+\rangle+|-\rangle|11\rangle|-\rangle)/\sqrt{2}$, and repeated such fusion operations allows for the growth of arbitrary cluster states (the redundant encoding of the central qubit is easily removed by a measurement in the $|\pm\rangle$ basis, yielding the 3-qubit cluster state as claimed). Similarly, in the quadbit cluster case arbitrary quadbit clusters can be grown using a quadbit fusion operation. Applied to two 2-quadbit clusters such a fusion would achieve the state
$|QdC_3\rangle=\sum_{i=0}^3|+_i\rangle|\bar i \bar i\rangle|+_i\rangle/2$.

\subsection{Optical quadbit cluster states}
\label{Sec:0102}

We define a quadbit single photon quantum state in two polarization/spatial modes as follows:
\begin{eqnarray}
\label{five:eq01} |\bar0 \rangle &\equiv& |H \rangle_1 \,
,~|\bar1 \rangle \equiv |V \rangle_1 \, , ~ |\bar2 \rangle \equiv
|H \rangle_{2} \, ,~|\bar3 \rangle \equiv |V \rangle_{2} \, ,~~~~~
\end{eqnarray}
where $H (V)$ denotes horizontal (vertical) polarization, and the subindex $1 (2)$ denotes spatial mode $k_1$($k_2$).

Consider now a so-called hyper-entangled state (HES)
\cite{kwiat01}, which is a two-photon state entangled in both polarization and spatial modes. Two-photon HES's can be generated by spontaneous parametric down-conversion \cite{kwiat}.
As with generation of single photons, such a mechanism of HES production is not scalable. In Section \ref{Sec:0201}, we will consider scalable production of HES's given deterministic single photon sources or entangled pairs. It is possible to represent an HES as product of Bell states, with a virtual tensor product structure between the spatial and polarization modes, for example:
\begin{eqnarray}
\label{five:eq05}
|\Phi^{+}_{\rm HES} \rangle &=& {1 \over 2} ( | H \rangle | H
\rangle + |V \rangle |V \rangle)\otimes(|1\rangle|3\rangle +
|2\rangle|4\rangle).~~~~~
\end{eqnarray}
(We will always use the $\otimes$ symbol to refer to this virtual tensor product of spatial modes and polarizations). Using the identification in Eq.~(\ref{five:eq01}), we see that
$|\Phi^{+}_{\rm HES} \rangle$ is equal to
\begin{eqnarray*}
\label{five:eq06} |\Phi^{+}_{\rm HES} \rangle &=& {1\over 2}
(|H\rangle_1|H\rangle_3+|V\rangle_1|V\rangle_3+|H\rangle_2|H\rangle_4+|V\rangle_2|V\rangle_4)
\\&=& {1\over 2} (
|\bar0 \rangle |\bar0 \rangle + |\bar1 \rangle |\bar1 \rangle +
|\bar2 \rangle |\bar2 \rangle + |\bar3 \rangle |\bar3 \rangle).
\end{eqnarray*}
As any single mode unitary operation can be implemented with linear optics \cite{reck}, a simple circuit can be constructed which rotates the quadbit in modes 3 and 4 to yield the optical 2-quadbit cluster state $|QdC_2\rangle$ defined above.

Consider attempting to fuse two 2-quadbit clusters, the first in modes (1,2;3,4) (as in Eq.(\ref{five:eq05})), the second in modes
(5,6;7,8). The procedure required to fuse the quadbit in spatial modes 1,2 with that in 5,6 is a gate which (when successful)
performs a projective measurement of the form:
\[
|HH\rangle_{1\,5}\langleHH|+|VV\rangle_{1\,5}\langleVV|+|HH\rangle_{2\,6}\langleHH|+|VV\rangle_{2\,6}\langleVV|.
\]
That is, a successful measurement should reveal ``the photons were in corresponding spatial modes with the same polarization'',
but should not reveal in which spatial modes and with what polarization. In section \ref{Sec:03} we will show that such a fusion is possible, although we have only found methods of doing it that make use of ancillary systems, and for which the success probability strongly depends on the nature of the ancillas available.

\section{Generation of quadbit cluster states}
\label{Sec:02}

Before discussing possible fusion mechanisms, we turn to examining some
``initial state resource tradeoffs''. This is because, as in the case of single photons, parametric downconversion is not a suitable source for scalable LOQC. Therefore we may well need to generate deterministic HES's from a deterministic source of either single photons, Bell pairs or GHZ states. Whether the constructions we give are optimal (or even close to being so) we cannot determine. Thus the procedures we present should be seen as simply giving upper bounds on the resources required. Also,
because the most efficient fusion gate we will present for quadbit clusters destroys the photons involved (much like Type-II fusion for qubits) we will need to look at mechanisms for generating an initial resource of 3 and 4 quadbit cluster states.

Basic notation for the figures, and a brief outline of the operation of the fundamental optical components is set out in Appendix A.

\subsection{General procedure for HES generation}
\label{Sec:0201}

The general circuit we present (Fig.\ref{fig:J2}) is built from two copies of a sub-circuit we label $J_1$, and we first explain the operation of this circuit.

The circuit $J_1$ consists of three beam splitters (BSs) with two vacuum inputs. Consider the case where a Bell state $(|H\rangle_1
|H\rangle_2+|V\rangle_1|V\rangle_2)/\sqrt{2}$ is input into $J_1$. The first BS creates a bunched two-photon state in modes 1 and 2, and then two vacuum inputs are applied from modes $1'$ and $2'$ with two regular BSs. After the circuit $J_1$, the state of two photons in mode 1, $1'$, $2$, and $2'$ is equal to
\begin{eqnarray}
\label{six:eq02} &&|M \rangle_{121'2'} = {1 \over 4} ( | H \rangle
| H \rangle + |V \rangle |V \rangle) \nonumber \\ &&~~~~~~~\otimes
\sum_{j=1}^{2}\big[{\rm e}^{{\rm i}j\pi}(|j\rangle|j\rangle +|j'\rangle|j'\rangle +
\sqrt{2} |j\rangle|j'\rangle ) \big].~~~~~~~
\end{eqnarray}
It is a combination of four states of bunched photon pairs in a spatial mode ($|j\rangle|j\rangle$ and $|j'\rangle|j'\rangle$)
and two anti-bunched states in two different spatial modes ($|j\rangle|j'\rangle$).

\begin{center}
\begin{figure}[h]
\resizebox{!}{6.5cm} {\includegraphics{J2} } \caption{(color online) \label{fig:J2} (a) Circuit $J_1$ (b) Circuit $J_2$ for a hyper-entangled state from four entangled photons}
\end{figure}
\end{center}

We turn now to the full circuit $J_2$ depicted in Figure
\ref{fig:J2} (b). At the centre of the circuit is a source $S_2$
which can be either single photons, Bell pairs or a 4-photon GHZ state. This source is then fed into two copies of the $J_1$ gate,
the outputs of which impinge on 50:50 beam splitters as shown. It is easiest to begin with the case that the source consists of two Bell pairs.

The initial state of the two Bell pairs $|{\Phi^{+}} \rangle_{1\,2}
|{\Phi^{+}} \rangle_{3\,4}$ is
\begin{eqnarray}
\label{six:eq04} && {1 \over 2} ( | H \rangle_1 | H \rangle_2 +
|V \rangle_1 |V\rangle_2 )( | H \rangle_3 | H \rangle_4 + |V
\rangle_3 |V\rangle_4 ).
\end{eqnarray}
According to Eq. (\ref{six:eq02}), the state after the two $J_1$
circuits is equal to
\begin{eqnarray}
\label{six:eq045} && \hspace{-1cm} |M \rangle_{1\,2\,1'\,2'} |M
\rangle_{3\,4\,3'\,4'}\nonumber \\
&=& {1 \over 16}( | H \rangle | H \rangle + |V \rangle |V
\rangle)( | H \rangle | H \rangle + |V \rangle |V \rangle)
\nonumber \\&& ~~~\otimes \sum_{j=1}^{2}\big[{\rm e}^{{\rm i}j\pi}(|j\rangle|j\rangle +|j'\rangle|j'\rangle + \sqrt{2}
|j\rangle|j'\rangle ) \big] \nonumber \\&& ~~~~~~
\sum_{k=3}^{4}\big[{\rm e}^{{\rm i}k\pi}(|k\rangle|k\rangle
+|k'\rangle|k'\rangle + \sqrt{2} |k\rangle|k'\rangle ) \big].~~~
\end{eqnarray}

At the end of the $J_1$ circuits, two BSs are applied in modes
$1',4'$ and $2',3'$, after which detectors are located. Successful operation occurs when two identically polarized photons are detected in modes $1', 4'$ or $2', 3'$ respectively, and the success probability of the detection pattern is 1/16. To see how this works, note that it is the components of the state in Eq.
(\ref{six:eq045}) which consist of two bunched photons
($|j'\rangle|j'\rangle$ and $|k'\rangle|k'\rangle$) that can yield successful detection : the anti-bunched photonic states
($|j\rangle|j'\rangle$ and $|k\rangle|k'\rangle$) result in destructive interference. For example, if we detect two horizontal photons in modes $1'$ and $4'$ but nothing in modes
$2'$ and $3'$, the outcome state is
\begin{eqnarray}
\label{six:eq06} | \psi'_{\rm HES} \rangle &=& {1 \over 2
\sqrt{2}} ( | H
\rangle | H \rangle + |V \rangle |V \rangle) \nonumber \\
&& \otimes (|1\rangle|1\rangle - |2\rangle|2\rangle +
|3\rangle|3\rangle - |4\rangle|4\rangle).~~~~~
\end{eqnarray}
This state is, up to a linear optical transformation (in this case two BSs in mode 1 and 2 and mode 3 and 4), a hyper-entangled state.

It is interesting to note that the failure outcomes can still yield photons in useful states. In particular the failure outcome where only the vacuum is detected leaves all the photons still in two Bell pairs ; this occurs with probability 1/16, and obviously the gate can then simply be repeated. This suggests the overall success probability is essentially 1/8. Some of the detection patterns, while not yielding an HES do still leave two of the photons in Bell pair, which could be recycled.

We are also able to use for the source a four-qubit GHZ state of the form $(|HHHH\rangle_{1234}+|VVVV\rangle_{1234})/\sqrt{2}$ rather than two Bell pairs; this yields a higher success probability. This also has the advantage that in this case we need not assume the four detectors are polarization sensitive : they need only count numbers of photons at the output of the primed modes. Upon successful detection, when two photons are detected in any two spatial modes, the state in modes 1 to 4 becomes a HES with a success probability 3/16, which is higher than the case of two Bell pairs. Interestingly, no photon detection yields a 4-photon entangled state such as $(|\Phi^{+}\rangle_{12}
|\Phi^{+}\rangle_{34}+|\Psi^{+}\rangle_{12} |\Psi^{+}\rangle_{34})/\sqrt{2}$.

Finally, if we wish to create a HES ballistically from single photons, then we can replace the two Bell pairs input at the source $S_2$ by two copies of the circuit for generating a Bell pair from 4 single photons (Figure \ref{NewS01} in Appendix
\ref{Append03}). In this case we find that the success probability is $1/16^3$.

\subsection{Generating larger quadbit cluster states}
\label{Sec:0202}
\subsubsection{3 quadbit cluster state}
\label{Sec:020201}
\begin{figure}[h]
\centering
\includegraphics[width= 9cm]{MQF02} \vspace{-1cm}
\caption{ \label{MQF02} (color online) Circuit $K_1$ for a 3 quadbit cluster state from two HESs}
\end{figure}

To create a 3 quadbit cluster state, we use the ``modified quantum filter'' (MQF) scheme we present in Appendix \ref{Append0202}.
This circuit implements a parity gate between the input photons in a manner which does not destroy the input photons when it is successful, and moreover is unaffected by situations wherein one of the input modes is empty.

Our circuit for generating a 3 quadbit cluster from two HES's is depicted in Figure \ref{MQF02}. $S_1$ and $S_2$ are sources of initial HESs each in $|\Phi^{+}_{\rm HES} \rangle$. Note that there is \emph{one} photon spread across spatial modes (3,4) and one photon spread across spatial modes (5,6) - the circuit is a two-photon gate, and only one photon will be detected - this is reminiscent of fusing together two Bell pairs by Type-I fusion to create a 3 qubit GHZ (cluster) state, and in fact this gate does act as a Type-I fusion gates for quadbits. After a successful operation in modes 3 and 5 of the MQF, the outcome state is equal to
\begin{eqnarray}
\label{seven:eq02} && {\sqrt{2} \over 6} \big[ |H\rangle_1
|H\rangle_3 |H\rangle_5 |H\rangle_7 + |V\rangle_1 |V\rangle_3
|V\rangle_5 |V\rangle_7 \nonumber \\ &&~+ 2 (|H\rangle_2
|H\rangle_4 + |V\rangle_2 |V\rangle_4) (|H\rangle_6
|H\rangle_8 + |V\rangle_6 |V\rangle_8 ) \big], \nonumber \\
\end{eqnarray}
(the measurement operator for operation of the MQF's is presented in Eq. (\ref{QF04}) in Section \ref{Append0202}). Note that in Eq.
(\ref{seven:eq02}), the first two terms contain a photon in mode 3 and the other terms also have a photon in mode 6 (these are the modes which will be detected). After a polarizing beam splitter
(PBS) between modes 4 and 6, two $R_{\pi/4}$s, and a BS in mode 3 and 6, detection of a photon in either mode 3 or 6 results in successful gate operation. The outcome state results from only four terms in Eq. (\ref{seven:eq02}), such as $|H\rangle_1
|H\rangle_3 |H\rangle_5 |H\rangle_7 $, $|V\rangle_1 |V\rangle_3
|V\rangle_5 |V\rangle_7$, $|H\rangle_2 |H\rangle_4 |H\rangle_6
|H\rangle_8$, and $|V\rangle_2 |V\rangle_4 |V\rangle_6
|V\rangle_8$. The extra beam splitter (${\rm BS}_{3/4}$) with vacuum input in mode 4 balances amplitudes in the final state.
For example, after a successful detection in the MQF, the detection of a vertical photon in mode 3 and vacuum in modes 6 and $4$ yields a final state
\begin{eqnarray}
\label{seven:eq03} |{ QdC'_3} \rangle &=& {1\over 2} \big(
|H\rangle_1 |H\rangle_5 |H\rangle_7 - |V\rangle_1 |V\rangle_5
|V\rangle_7 \nonumber \\ &&~~~+ |H\rangle_2 |H\rangle_4
|H\rangle_8 - |V\rangle_2
|V\rangle_4 |V\rangle_8 \big), \nonumber \\
&=& {1\over 2} \big( |\bar0 \rangle |\bar0 \rangle |\bar0 \rangle
- |\bar1 \rangle |\bar1 \rangle |\bar1 \rangle + |\bar2 \rangle
|\bar2 \rangle |\bar2 \rangle - |\bar3 \rangle |\bar3 \rangle
|\bar3 \rangle \big),\nonumber \\
\end{eqnarray}
where the set $\{|\bar0 \rangle, |\bar1 \rangle, |\bar2 \rangle,
|\bar3 \rangle\}$ is defined by $ \{|H\rangle_1$, $|V\rangle_1$,
$|H\rangle_2$, $|V\rangle_2\}$, $ \{|H\rangle_5$, $|V\rangle_5$,
$|H\rangle_4$, $|V\rangle_4\}$, and $ \{|H\rangle_7$,
$|V\rangle_7$, $|H\rangle_8$, $|V\rangle_8\}$.

When the generalized quadbit Hadamard operation and a phase shift are employed on a vertical photon in mode 5 and 4, the outcome state is equivalent to a 3-quadbit cluster state $|{
QdC_3}\rangle$ in Section \ref{Sec:0101}. Therefore, we obtain a three-quadbit cluster state in modes 1,2,4,5,7, and 8 with success probability 1/256.

\begin{figure}[h]
\centering
\includegraphics[width= 9cm]{MQF03} \vspace{-1.5cm}
\caption{ \label{7th} (color online) Circuit $K_2$ for a 4-quadbit cluster state from 2 hyper-entangled pairs}
\end{figure}

\subsubsection{4 quadbit cluster state}
\label{Sec:020202} A slight modification of the circuit in the previous subsection can easily build a 4-quadbit cluster state.
We start from the intermediate state in Eq. (\ref{seven:eq02})
(see Figure \ref{7th}). Because the state does not contain an input of vacuum states in mode 4 and 6, the original QF can be used (see Section \ref{Append0202}). When the original QF is successfully applied in modes 4 and 6 to the outcome in Eq.
(\ref{seven:eq02}), the final state is equal to
\begin{eqnarray}
\label{seven:eq05} |{\rm QdC'_4}\rangle &=& {1\over 2} \big(
|H\rangle_1 |H\rangle_3 |H\rangle_5 |H\rangle_7 + |V\rangle_1
|V\rangle_3 |V\rangle_5 |V\rangle_7 \nonumber \\ && ~+
|H\rangle_2 |H\rangle_4 |H\rangle_6 |H\rangle_8 + |V\rangle_2
|V\rangle_4 |V\rangle_6 |V\rangle_8 \big).\nonumber \\
\end{eqnarray}
This is equivalent to
\begin{eqnarray}
\label{seven:eq055} |{\rm QdC_4}\rangle &=& {1\over 2}
\sum^{3}_{d=0} | +_{d} \rangle | \bar{d} \rangle | +_{d} \rangle
| +_{d} \rangle,
\end{eqnarray}
up to a local operation on the second photon. Note this is a 4-quadbit state of ``star'' form - i.e a central quadbit with three leaves, and thus is useful for creating quadbit clusters with nontrivial topology.

From the resource point of view, two hyper-entangled states and six single photons (four horizontal and two vertical photons) are used to create such a 4 quadbit cluster with success probability 1/1024.

\section{Fusing quadbit cluster states}
\label{Sec:03}

\begin{center}
\begin{figure}[h]
\centering
\includegraphics[width=11cm]{T203} \caption{ \label{8th}
(color online) Circuit $K_3$ of a Type2-like fusion gate on two hyper-entangled pairs}
\end{figure}
\end{center}

In order to perform optical quadbit one-way quantum computation,
we require a procedure for building large multi-quadbit cluster states. The Type-I style gate (of section \ref{Sec:020201}) could be used; however its success probability is very low.

In Figure \ref{8th}, we present a Type-II-like fusion gate between two quadbit cluster states. The total circuit is comprised of two sub-circuits we label $T_3$, consisting of two four-port interferometers. The operation of the $T_3$ gate is discussed in Appendix \ref{Append0201}. The basic effect of gate
$T_3$ is to destroy the spatial mode information carried by the photons while leaving their polarization information in fact.

As shown in Figure \ref{8th}, the initial state is prepared in
$|\Phi^{+}_{\rm HES} \rangle_{1\,2\,3\,4} |\Phi^{+}_{\rm HES}
\rangle_{5\,6\,7\,8}$. What we desire of this gate is that when it succeeds it tells us ``the photons were either in modes 3 and 5 or they were in modes 4 and 6, and their polarization was the same''. However it should not reveal in which pair of spatial modes they were, and with what polarization.

After two $R_{\pi/2}$s in mode 5 and 6, the intermediate state is
\begin{eqnarray}
\label{seven:eq06} &&{1 \over 4} ( | H \rangle_1 | H \rangle_3 +
|V \rangle_1 |V\rangle_3 + | H \rangle_2 | H \rangle_4 + |V
\rangle_2 |V\rangle_4 )\nonumber \\
&&~~~( | V \rangle_5 | H \rangle_7 + |H \rangle_5 |V\rangle_7 + |
V \rangle_6 | H \rangle_8 + |H \rangle_6 |V\rangle_8 ).~~~~~~~~
\end{eqnarray}
Based on the discussion in Appendix \ref{Append0201}, if the upper
$T_3$ gate is implemented (without extra photons in modes $3'$,
$5'$) and a successful detection occurs (i.e. a single horizontal and a single vertical photon are detected in two of the modes 3,
5, $3'$, and $5'$), it generates the Bell state in modes 1 and 7:
\begin{eqnarray}
\label{seven:eq07} (| H \rangle_{1} | H \rangle_{7} \pm |V
\rangle_{1} |V \rangle_{7})/\sqrt{2}.
\end{eqnarray}
Note that the parts of the input state with amplitude in modes 4,
6 are then wiped out.

On the other hand, if the lower $T_3$ gate detects one horizontal and one vertical photon, originating from modes 4 and 6, it makes a Bell state in mode 2 and 8
\begin{eqnarray}
\label{seven:eq08} (| H \rangle_{2} | H \rangle_{8} \pm |V
\rangle_{2} |V \rangle_{8})/\sqrt{2}.
\end{eqnarray}
and amplitude for modes 3 and 5 is wiped out.

We essentially desire both of these $T_3$ gates to be able to succeed simultaneously and indistinguishably. In order to attain this, extra photons are injected into the spatial modes $3'$,
$4'$, $5'$, and $6'$. We will consider various possible initial states for these ancillary photons. The basic idea is that indistiguishable events occur if two photons in different polarizations are detected in both the upper and lower $T_3$
gates simultaneously. These events can arise from either the ancillary photons or the `actual inputs' - and our lack of knowledge about which possibility occurs gives an amplitude for both $T_3$ gates working. The success probability relies on the input state of the extra two photons, and we discuss several possibilities.

The first case is that two single photons are injected in mode
$3'$, $4'$, $5'$, and $6'$ in the state
\begin{eqnarray}
\label{seven:eq09} |{\rm Ex_1}\rangle &=& {1 \over 4} ( | H
\rangle_{3'} + |V \rangle_{3'} + | H \rangle_{4'} + |V
\rangle_{4'} )\,\nonumber \\&& ~~~~~( -| H \rangle_{5'} + |V
\rangle_{5'} -| H \rangle_{6'} + | V \rangle_{6'} ),~~~~~~~
\end{eqnarray}
where each photon is a superposed state in two spatial modes in both polarizations.

When we detect two different polarized photons in the upper $T_3$ gate and two different polarized photons in the lower one, we do not know whether the four photons detected in both $T_3$ gates come from hyper-entangled states or the extra input photons. For example, the upper $T_3$ gate succeeds upon detection of a horizontal photon in mode 3 and a vertical photon in mode 5. The photons could come from any two modes out of modes 3, 5, $3'$, and $5'$. According to Eq. (\ref{seven:eq09}),
the detection works on various input states like $|H \rangle_{3}
|V \rangle_{5}$, $|V \rangle_{3} |H \rangle_{5}$ ,$|H \rangle_{3'}
|V \rangle_{5'}$, and $|V \rangle_{3'} |H \rangle_{5'}$. If the detected photons were $|H \rangle_{3} |V \rangle_{5}$, $|V
\rangle_{3} |H \rangle_{5}$, the remaining state from Eq.
(\ref{seven:eq06}) is equal to the state in Eq.
(\ref{seven:eq07}). However, if the detected photons were $|H
\rangle_{3'} |V \rangle_{5'}$, and $|V \rangle_{3'} |H
\rangle_{5'}$, the lower circuit could be activated by $|H
\rangle_{4} |V \rangle_{6}$, $|V \rangle_{4} |H \rangle_{6}$ and the remaining state equals Eq.(\ref{seven:eq08}). The same logic can be applied the other way around between the upper and lower circuits. Thus, for the successful cases (two different polarized photons detected in each $T_3$ gate), the final state is equivalent to
\begin{eqnarray}
\label{seven:eq10} && {1 \over 2} ( | H \rangle_1 | H \rangle_7 +
| V \rangle_1 | V \rangle_7 + | H \rangle_2 | H \rangle_8 + | V
\rangle_2 | V \rangle_8 ) \,,~~~~~~~~
\end{eqnarray}
which is a superposition state of Eq. (\ref{seven:eq07}) and Eq.
(\ref{seven:eq08}). For this case, the total success probability is 1/64.

We now consider injecting a Bell pair in mode $3'$, $4'$,
$5'$, and $6'$ instead of two single photons such as
\begin{eqnarray}
\label{seven:eq11} |{\rm Ex_2}\rangle &=& {1 \over 2\sqrt{2}}
\big[ ( | H \rangle_{3'} + |V \rangle_{3'})(-| H \rangle_{5'} + |V
\rangle_{5'}) \nonumber \\&&~~~~~+ (| H \rangle_{4'} + |V
\rangle_{4'})(-| H \rangle_{6'} + | V \rangle_{6'} ) \big].
\end{eqnarray}
it can readily be seen that the same indistinguishability of $T_3$
gate operations occurs - in this case with total success probability 1/32.

Finally, the most efficient state to use is the ancillary input a HES
\begin{eqnarray}
\label{seven:eq12} |{\rm Ex_3}\rangle &=& {1 \over 2} ( | H
\rangle_{3'}| V \rangle_{5'} +| V \rangle_{3'}| H \rangle_{5'}
\nonumber \\&&~~~~~+ | H \rangle_{4'}| V \rangle_{6'}+ | V
\rangle_{4'}| H \rangle_{6'}).~~
\end{eqnarray}
In this case the total success probability is 1/16.

Interestingly, even in some failure cases, we still have a chance to have remanent entanglement between two photons in mode 1 (or 2) and 7 (or 8) of Figure \ref{8th}. Without the help of extra photons in the primed modes, the success probability is 1/2 to generate a Bell pair from two HESs through this circuit. If we use two extra photons, the possibility of obtaining some entanglement between 1 (or 2) and 7 (or 8) becomes higher than 1/2. This could possibly be useful for some hybrid qubit/quadbit cluster states computing schemes. For example, we imagine a modified qubit cluster state possessing a HES at the edge and fuse two copies of this state on the HES side in circuit $K_3$. With an extra HES in the primed modes, a HES or a Bell pair is generated among mode 1,
2, 7, and 8 with overall probability 3/4.

We see therefore that we can use this Type2-like circuit to create a Bell pair from HESs. As shown in Figure \ref{8th}, we prepare two HESs with no extra photon. With probability 3/4 we achieve a Bell state. Although this seems perverse - destroying two HES's to create a Bell pair, it raises an interesting possibility of attaching systems which have the form of a HES at the end of the (qubit) cluster state. If we perform this fusion gate on two such photons, it appears that we could fuse the larger qubit cluster state with the probability 3/4.

\section{Summary of some resource tradeoffs}
\label{Sec:04}

\subsection{Difficulties of quantifying tradeoffs}
\label{Sec:0403}

A Bell pair can be created from 4 single photons with probability 1/4 (see Appendix \ref{Append03} for a proof - previously published results \cite{pan} suggested the success probability was 3/16). Such creation is \emph{ballistic} - the single photons are fired in, and (up to some local linear transformation) the desired Bell state is created 1 in 4 times. We could say that a Bell pair is ``worth'' 16 single photons on average - this indicates how much easier things will be if we have a deterministic source of Bell pairs. Now a trivial extension of this ballistic scheme can create a 3-photon GHZ state from 6 single photons with probability 1/32, and can create
(ballistically) a 4-photon GHZ state with probability 1/128 (see Table \ref{tab:MHES}). From this we might conclude a 3-photon GHZ state is worth 96 photons. However we can also create a 3-photon GHZ state by using a Type-I gate \cite{Browne05} and fusing two Bell pairs. The Type-I gate succeeds with probability 1/2, and each Bell pair is worth 16 photons, so this indicates the GHZ state is only worth 64 single photons. The difference, of course,
is that with the latter technique we would have to store the Bell pair, once created, in order for it to be available to combine with the second Bell pair. While the ability to postselect on successfully generated states (and then store them) lies at the heart of why it is we can turn exponentially decreasing probabilities into efficient methods for creating large entangled states, such storage is likely to present practical problems. (It is worth noting that the percolation techniques of
\cite{kieling06} ameliorate many of these issues).

Resource counting is made even messier by the following observation: Sometimes we may require the use of an ancillary entangled state within some larger ballistic circuit (a Bell pair say). One may think that we could replace this Bell pair by 4 single photons (as in Fig. \ref{NewS01} in Appendix
\ref{Append03}) to obtain a ballistic single photon scheme, and only take a hit of 1/4 in the overall success probability of the larger circuit. However the ballistic scheme presumes the ideal state is produced ``up to easily implementable linear optical transformations'' - and it is generally a smaller set of detection outcomes which yield the desired state for input into the larger circuit.

The final feature that makes resource counting difficult is the nature of failure outcomes: sometimes failed gates acting on suitably large input states still leave some of the systems in useful resource states. The potential for recycling (which also requires quantum memory) often greatly complicates the question of optimizing resource counting \cite{Gross}.
\begin{table}[b]
\centering
\begin{tabular}{|cccccc|c|c|}\hline
& & & Resource & & & Output & Probability \\ \cline{1-6}
SP & BP & 3GHZ & 4GHZ & HES &
3QdC & & \\
\hline 4 & & & & &
& BP & 1/4 \\
6 & & & & &
& 3GHZ & 1/32 \\
8 & & & & &
& 4GHZ & 1/128 \\
& 2 & & & &
& 3GHZ & 1/2 \\
& 1 & 1 & & &
& 4GHZ & 1/2 \\
\hline 8 & & & & &
& HES & 1/4096 \\
& 2 & & & &
& HES & 1/16 \\
& & & 1 & &
& HES & 3/16 \\
4 & & & & 2 & & 3QdC & 1/256 \\
6 & & & & 2 & & 4QdC & 1/1024 \\
6 & & & & 1 & 1 & 4QdC & 1/256 \\
2 & & & & & 2 & 4QdC & 1/64 \\
& 1 & & & & 2 & 4QdC & 1/32 \\
& & & & 1 & 2 & 4QdC & 1/16 \\
\hline
\end{tabular}
\caption{ Resource costs for multi-quadbit cluster states (SP =
single photon, BP = Bell pair, HES = hyper-entangled state, and QdC = quadbit cluster)} \label{tab:MHES}
\end{table}
\subsection{Resources for quadbit cluster states}
\label{Sec:0401}

As shown in Table \ref{tab:MHES}, A various combination of resources can be used to create any desired state. Without an entangled source ({i.e.} only single photon sources) one can generate a Bell pair from 4 single photons, a 3-photon GHZ state from 6 photons, and 4-photon GHZ state from 8 photons. However,
using entangled sources, the desired many-photon state can be built with much higher probabilities.

In terms of quadbit cluster states, the counterpart of a Bell pair for qubit is a HES. So, to build a HES requires a source such as 8 single photons, two Bell pairs, or one 4-photon GHZ state. Based on the circuit $J_2$ the optimal probability is 3/16, obtained when using a 4-photon GHZ state.

The bottom of the table shows various ways of building multi-quadbit cluster states by proposed methods with the help of extra photons. We only have one method to build 3 quadbit cluster stated using circuit $K_1$, while several possible methods are available to create a 4 quadbit cluster state through circuits
$K_2$ and $K_3$. For the 4 quadbit cluster state, the success probability without a 3 quadbit cluster state is 1/1024 (using circuit $K_2$).

\section{Conclusion}
\label{Conc} We have initiated the study of building higher dimensional cluster states of photons. Although we have presented several ``modules'' within our constructions that we expect to be of generic use for LOQC using higher dimensional photonic states,
it is unclear to us whether the procedures we have outlined are close to the best possible. If they are, then there seems to be limited advantage in using higher dimensional cluster states built up from single photons from a strict resource counting perspective. It is possible, however, that in the future deterministic sources of hyper-entanglement become available.

Very recently, qubit one-way quantum computation using a hyper-entangled state (HES) has been demonstrated
\cite{new01,new02}. In these papers, a four-qubit cluster state is created from a HES generated by a spontaneous parametric down conversion. They assume that photon's polarization and its spatial modes are defined as a qubit respectively and destroying a single photon performs two single qubit measurements simultaneously. Note that this is quite different to our proposal, where we use a single photon as a higher-dimensional quantum unit. An equivalence between these schemes arises for a 2-quadbit HES and a 4-qubit linear cluster state because
$2+2=2\cdot2$ \cite{new01,new02}.

\acknowledgements

We acknowledge useful discussions with Jens Eisert, David Gross and Konrad Kieling and thank the support of the US Army Research Office (W911NF-05-0397) and the UK EPSRC. This work was supported in part by the UK Engineering and Physical Sciences Research Council through their Quantum Information Processing Interdisciplinary Research Centre, and by the European Union through their networks SCALA and CONQUEST. J.J. was also supported by the Overseas Research Student Award Program.
\title{Foliated manifolds, algebraic $K$-theory, and a secondary invariant}

\begin{abstract}
We introduce an $\C/\Z$-valued invariant for foliated manifolds and with partially flat vector bundles.
Our main result is a formula for the invariant in terms of algebraic $K$-theory and a regulator.
\end{abstract}

\section{Introduction}

In this paper we introduce and analyse an invariant
$$\rho(M,\cF,\nabla^{I},s)\in \C/\Z$$
of an odd-dimensional closed spin manifold $M$ equipped with a real foliation $\cF$, a complex vector bundle with flat partial connection $\nabla^{I}$ in the direction of the foliation, and a stable framing $s$ of the foliation.
In order to define this number we must choose in addition a Riemannian metric on $M$, an extension of the partial connection $\nabla^{I}$ to a connection, and similarly, an extension of the canonical flat partial connection on the normal bundle $\cF^{\perp}$ of the foliation.

Without any further conditions the number $\rho(M,\cF,\nabla^{I},s)$ may depend non-trivially on the additional geometric choices. But if the codimension of the foliation $\cF$ is sufficiently small, namely if \begin{equation}\label{fefwfwefewfewfewewf}
2\mathrm{codim}(\cF)< \dim(M)\ ,
\end{equation} then
$\rho(M,\cF,\nabla^{I},s)$ does not depend on the additional choices.

\bigskip

The quickest way to define the invariant in Definition \ref{flwefjwefewff} is to use the integration in differential complex $K$-theory $\widehat{KU}^{*}$. Alternatively, $\rho(M,\cF,\nabla^{I},s)$ can also be expressed as a combination of $\eta$-invariants of twisted Dirac operators and correction terms involving integrals of characteristic forms and their transgressions, see Proposition \ref{fjwfkljfklfjwelkfjkewjfewlkf9798237982749237432243}.

\bigskip

The invariant $\rho(M,\cF,\nabla^{I},s)$ is very interesting since it combines various classical secondary invariants in spectral geometry, topology and foliation theory in one object.
We will reveal these relations by analysing special cases in Section \ref{jbjkhwekfewfwefewf89798}. We will observe that
$\rho(M,\cF,\nabla^{I},s)$ subsumes Adams' $e$-invariant for framed manifolds, the rho-invariant for Dirac operators twisted with flat bundles,
and classical invariants from foliation theory like the Godbillon-Vey invariant.

\bigskip

While the construction of the invariant $\rho(M,\cF,\nabla^{I},s)$ and the verification of its basic properties are not very deep and based on well-known methods from differential geometry and local index theory we think that its relation with algebraic $K$-theory is much less obvious.
In the present paper we reveal this relation in the special case of a foliated manifold of the form $$(M,\cF)=(P\times X,T_{\C}P\boxplus \{0\})\ .$$
Here $P$ is closed and stably framed. A complex vector bundle
$(V,\nabla^{I})$ with flat partial connection on $M$ provides an algebraic $K$-theory class of the ring of complex-valued smooth functions $C^{\infty}(X)$. We will write this class as $$f^{o_{s}}_{!}([V,\nabla^{I}]^{alg})\in K_{p}(C^{\infty}(X))$$ with notation to be introduced in Section \ref{keklwfewfewfewf}, where
$p:=\dim(P)$. If $p >\dim(X)$ (this is exactly condition \eqref{fefwfwefewfewfewewf}), then we can define a regulator transformation
$${\tt reg}_{X}:K_{p}(C^{\infty}(X))\to {\mathbf{ku}}\C/\Z^{-p-1}(X)\ ,$$ see Definition \ref{klfwefewfewfwf}.
If we now assume that $X$ is a closed spin manifold such that $p+\dim(X)$ is odd, then we have an integration
$$\pi_{!}^{o}:{\mathbf{ku}}\C/\Z^{-n-1}(X)\to {\mathbf{ku}}\C/\Z^{-n-\dim(X)-1}(*)\cong \C/\Z\ ,$$ where
$\pi:X\to *$ and $o$ is the orientation of $\pi$ for ${\mathbf{ku}}\C/\Z$ induced by the spin structure.
Our main result is Theorem \ref{flkfefwefwefewfef}:
\begin{theorem} \label{edhjlqkdjqwdqwdwqdw}
$$\rho(M,\cF,\nabla^{I},s)=\pi_{!}^{o}({\tt reg}_{X}(f^{o_{s}}_{!}([V,\nabla^{I}]^{alg})))\ .$$
\end{theorem}
The proof of this theorem will be finished in Section \ref{keklwfewfewfewf}. It is based on the diagram \eqref{dewewdedewdewd2342343} which comprises various Riemann-Roch type squares for integration in algebraic and topological $K$-theory and their differential refinements.

\bigskip

In Section \ref{dkqwldqwdwqdwqdwqdwqd} we put the regulator ${\tt reg}_{X}$ into its natural general framework. We introduce the algebraic $K$-theory spectrum ${\mathbf{K}}(M,\cF)$ of a foliated manifold and the Hodge-filtered connective complex $K$-theory spectrum ${\mathbf{ku}}^{\flat}(M,\cF)$. The regulator ${\tt reg}_{X}$ used in the theorem above is then a special case of a regulator
$${\tt reg}:{\mathbf{K}}(M,\cF) \to {\mathbf{ku}}^{\flat}(M,\cF) \ .$$
In order to justify to call this map a regulator consider a complex manifold $M $ as a real manifold with a complex foliation $\cF:=T^{0,1}M$. In this case ${\mathbf{K}}(M,T^{0,1}M)$ is the algebraic $K$-theory spectrum of $M$ defined using holomorphic vector bundles. Furthermore, the homotopy groups of
${\mathbf{ku}}^{\flat}(M,T^{0,1}M) $
are the ${\mathbf{ku}}$-theory analogs of the integral Deligne cohomology groups.
The regulator is an integral refinement of a version of Beilinson's regulator.
We will explain all this in detail in Section \ref{dkqwldqwdwqdwqdwqdwqd}.

\bigskip

In the first Sections we introduce basic definitions from the theory of foliated manifolds and characteristic classes.
The experienced reader could skip these sections in a first reading and use them as a reference for notation and normalization conventions. In Section \ref{jfkelwfwefewfewfewfewfwef} we give a quick introduction to the features of differential complex $K$-theory which are used in the construction of the invariant $\rho(M,\cF,\nabla^{I},s)$. The actual construction of this invariant will be given in Section \ref{flkwjefwkejfeflkewjflkefjelkfjelfjewlfewf9790}. As mentioned above, in the two subsequent Sections \ref{fkweflwefewfewfewfwfwfw} and \ref{jbjkhwekfewfwefewf89798} we provide a spectral theoretic interpretation of the invariant and relate it to various classical secondary invariants.

In Section \ref{keklwfewfewfewf} we develop the theory which is necessary to state and prove Theorem \ref{edhjlqkdjqwdqwdwqdw}. Finally, Section \ref{dkqwldqwdwqdwqdwqdwqd} is devoted to the algebraic $K$-theory of foliated manifolds and the regulator in general. This section has a substancial overlap with the work of Karoubi \cite{karoubi45}, \cite{karoubiast} \cite{karoubi43}. In a certain sense it reformulates his constructions using the new technology of the $\infty$-categorical approach to $K$-theory and regulators developed in \cite{Bunke:2013ab}, \cite{Bunke:2012fk}, \cite{Bunke:2013aa}, \cite{Bunke:2014aa}.

\bigskip

{\em Acknowledgement: This work was partially supported by the SFB 1085 ''Higher Invariants'' of the DFG.}

\section{Foliated manifolds}\label{fewl453534535435}

We introduce the category of foliated manifolds $\Mf_{\C-fol}$ and its full subcategory $\Mf_{fol}$ of manifolds with real foliations .

\bigskip

Let $M$ be a smooth manifold and $T_{\C}M:=TM\otimes_{\R}\C$ be the complexified tangent bundle. A section of $ T_{\C}M$ is called a complex vector field. A complex vector field $X\in \Gamma(M,T_{\C}M)$ acts as a derivation $(X,f)\mapsto X(f)$ on the algebra $C^{\infty}(M)$ of complex-valued smooth functions $f$. For a pair of complex vector fields $X,Y $ we can consider the commutator $[X,Y]\in \Gamma(M,T_{\C}M)$. It is the unique complex vector field such that $$[X,Y](f)=X(Y(f))-Y(X(f))$$
for all $f\in C^{\infty}(M)$.

\bigskip

If $\cF\subseteq T_{\C}M$ is a subbundle, then we have an inclusion $\Gamma(M,\cF)\subseteq \Gamma(M,T_{\C}M)$ of spaces of sections.
\begin{ddd} A subbundle $\cF \subseteq T_{\C}M$ is called integrable if for any two sections $X,Y\in \Gamma(M,\cF )$ we also have $[X,Y]\in \Gamma(M,\cF)$.
\end{ddd}

\begin{ddd}
A foliation of a smooth manifold $M$ is an integrable subbundle $\cF \subset T_{\C}M$. A foliated manifold is a pair $(M,\cF )$ of a manifold and a foliation. \end{ddd}

Since $T_{\C}M$ is the complexification of the real vector bundle $TM$ we have a complex antilinear involution $X\mapsto \bar X$. For a subbundle $\cF\subset T_{\C}M$ we let $\bar \cF\subseteq T_{\C}M$ denote the subbundle obtained by applying this automorphism to the elements of $\cF$.
\begin{ddd}
A foliation is called real if $\overline{\cF}=\cF$.
In this case we define the real integrable subbundle $\cF_{\R}:=\cF\cap TM\subseteq TM$.
\end{ddd}

\bigskip

Let $f:M\to N$ be a smooth map between manifolds. Its differential is a map of bundles $$df :T_{\C}M\to f^{*}T_{\C}N$$ over $M$.

\begin{ddd}
We say that $f:(M,\cF )\to (M^{\prime},\cF^{\prime} )$ is a foliated map if its differential preserves the foliations in the sense that
$df (\cF )\subseteq f^{*}\cF^{\prime} $.
\end{ddd}

The composition of two foliated maps is again a foliated map.

\begin{ddd}
We let $\Mf_{\C-fol}$ denote the category of foliated manifolds and foliated maps.
We further let $\Mf_{fol}\subset \Mf_{\C-fol}$ be the full subcategory of foliated manifolds with real foliations.
\end{ddd}

Let $\Mf$ denote the category of smooth manifolds. Then we have functors
$$\Mf_{fol}\to \Mf_{\C-fol}\to \Mf\ ,$$
where the first is the inclusion of a full subcategory, and the second forgets the foliation.
The category of foliated manifolds has a cartesian product. It is given by
$$(M,\cF)\times (M^{\prime},\cF^{\prime})\cong (M\times M^{\prime},\cF\boxplus \cF^{\prime})\ .$$

\begin{ex}{\rm If $M$ is a complex manifold, then the subbundle $\cF :=T^{0,1}M\subseteq T_{\C}M$ is a complex foliation. Vice versa, a complex foliation $\cF $ with the additional property that $\cF \oplus \overline{\cF }\cong T_\C M$ equips $M$ with a complex structure such that $T^{0,1}M=\cF$.
Moreover, a foliated map between such foliated manifolds is the same as a holomorphic map.
}
\end{ex}

\begin{ex}\label{lkjwqdwdqwdqwdqwd}{\rm Every manifold $M$ has a minimal foliation $\cF_{min}:=\{0\}$ and a maximal foliation $\cF_{max} :=T_{\C}M$.
These foliations are real. If $M$ is equipped with the minimal foliation and $(M^{\prime},\cF^{\prime})$ is a foliated manifold, then every smooth map $M\to M^{\prime}$ is foliated.
Similarly, if $M^{\prime}$ is equipped with the maximal foliation, then for a foliated manifold $(M,\cF)$ every smooth map $M\to M^{\prime}$ is foliated.
}
\end{ex}

\begin{ex}\label{djqlwdqwdqwdq}{\rm Let $\pi:W\to B$ submersion. Then the complexification of the vertical bundle $T^{v}\pi:=\ker(d\pi)\subseteq TW$
defines a real foliation $\cF^{v}$ called the vertical foliation. The map $\pi$ is foliated for any choice of a foliation on $B$.
}
\end{ex}

\begin{ex}{\rm Let $\Gamma$ be a discrete group which acts freely and properly on a manifold $\tilde B$ from the right with quotient $B:=\tilde B/\Gamma$. Furthermore, let $X$ be a manifold with a left action of $\Gamma$. Then we consider the manifold
$M:=\tilde B\times_{\Gamma} X$. The vertical foliations $\tilde \cF^{v} $ and $\tilde \cF^{H} $ associated to the projections $\tilde B\times X\to \tilde B$ and $\tilde B\times X\to X$ descend to the quotient and define the vertical and horizontal foliations $\cF^{v} $ and $\cF^{H} $ on $M$. Note that $ \cF^{v} $ is the vertical foliation of the submersion $ M\to B$. We have $\cF^{H}\oplus \cF^{v}\cong T_{\C} M$. }
\end{ex}

\begin{ex}\label{dekldlqwdqwdqwd}{\rm

Let $(B,\cF)$ be a foliated manifold. We call a map $f:W\to B$ transversal to $\cF$ if for every $w\in W$ we have the relation $\cF_{f(w)}+df(TW_{x})=T_{f(x)}B$. If $f$ is transversal to $\cF$, then we can define a maximal foliation $f^{-1}\cF$ on $W$ such that $f$ becomes a foliated map. We must set $f^{-1}\cF:=df^{-1}(f^{*}\cF)$.

\bigskip

In particular, if $P$ is a manifold, then we can consider the projection $\pi:P\times B\to B$.
In this case, $ \pi^{-1}\cF=T_{\C}P\boxplus \cF$.

}
\end{ex}

\section{Filtrations on the de Rham complex}

A foliation on a manifold induces a decreasing multiplicative filtration of the de Rham complex.

\bigskip

We consider a foliated manifold $(M,\cF)$.
By $(\Omega(M),d)$ denote the complexified de Rham complex of $M$.

\begin{ddd}\label{ilfjewlfwfewfewfewfwfw}
For $n,p\in \nat$ we define the subspace $$F^{p}\Omega^{n}(M)\subseteq \Omega^{n}(M)$$ of forms which vanish after the insertion of $n-p+1$ sections of $\cF$.
\end{ddd}

The family of these subspaces for all $p$ forms a decreasing filtration of $\Omega^{n}(M)$. More precisely we have the following chain of inclusions
$$ \Omega^{n}(M)=F^{0}\Omega^{n}(M) \supseteq F^{1}\Omega^{n}(M)\supseteq \dots \supseteq F^{\mathrm{codim}(\cF)}\Omega^{n}(M)\supseteq F^{\mathrm{codim}(\cF)+1}\Omega^{n}(M)=0\ .$$
Combining these filtrations for all $n$ together we get a decreasing filtration $(F^{p}\Omega(M))_{p\in \nat}$ of the graded commutative algebra $\Omega(M)$ which is multiplicative, i.e. the wedge product restricts to maps \begin{equation}\label{hjkqhdkdhwqkdwqd98789}
\wedge :F^{p}\Omega^{m}(M)\otimes F^{q}\Omega^{n}(M)\to F^{p+q}\Omega^{m+n}(M)\ .\end{equation}

These properties hold in fact true for arbitrary subbundles $\cF$ of $T_{\C}M$. But as a consequence of integrability of $\cF$ this filtration is also preserved by the de Rham differential, i.e.
$(F^{p}\Omega(M),d)$ is a subcomplex of $(\Omega(M),d)$ for every $p\in \nat$.

\bigskip

\begin{ddd}
For a foliated manifold $(M,\cF)$ we write $\Omega(M,\cF)$ for the de Rham complex $\Omega(M)$ considered as a filtered commutative differential graded algebra.
\end{ddd}

\begin{ex}{\rm If $(M,\cF)$ is a complex manifold, then the filtration on $\Omega(M,T^{0,1}M)$
is called the Hodge filtration. The associated spectral sequence is called the Hodge-de Rham spectral sequence.
}\end{ex}

\begin{ex}{\rm If $W\to B$ is a submersion and $\cF^{v}$ is the vertical foliation (Example \ref{djqlwdqwdqwdq}) on $W$, then the spectral sequence associated to the filtration of the de Rham complex $\Omega(W,\cF^{v})$ is the Leray-Serre spectral sequence.}\end{ex}

If $f:(M,\cF)\to (M^{\prime},\cF^{\prime})$ is a foliated map, then $f^{*}:\Omega(M^{\prime},\cF^{\prime})\to \Omega(M,\cF)$ is a morphism of filtered commutative differential graded algebras.

\bigskip

We let $\mathbf{CDGA}$ and $\mathbf{CDGA}^{filt}$ denote the categories of graded commutative differential graded algebras and filtered graded commutative differential graded algebras. For categories $\bC,\bD$ we can consider the category $${\mathbf{PSh}}_{\bD}(\bC):=\Fun(\bC^{op},\bD)$$ of $\bD$-valued presheaves on $\bC$
We can formalize the properties of the filtered de Rham complex discussed above by saying that we have a presheaf
$$\Omega\in {\mathbf{PSh}}_{ \mathbf{CDGA}^{filt}}(\Mf_{\C-fol})\ .$$

\begin{ddd}\label{jkdjlqwdqwdqwd} We define the presheaves of graded commutative differential graded algebras $$DD^{-}\in {\mathbf{PSh}}_{\mathbf{CDGA}}(\Mf_{\C-fol})\ , \quad DD^{per}\in {\mathbf{PSh}}_{\mathbf{CDGA}}(\Mf)$$ by
$$DD^{-}(M,\cF) :=\prod_{p\in \Z} F^{p}\Omega (M)[2p]\ , \quad DD^{per}:=\prod_{p\in \Z} \Omega(M)[2p]\ .$$
\end{ddd}
We call $DD^{per}$ the periodic and $DD^{-}$ the negative de Rham complex.
Note that $DD^{-}$ has a decomposition into a product of components $$DD^{-}(M,\cF)\cong \prod_{p\in \Z} DD^{-}(M,\cF)(p)\ , \quad DD^{-}(M,\cF)(p) :=F^{p}\Omega (M)[2p]\ ,$$
and the product on $DD^{-}$ is induced by the wedge products of forms \eqref{hjkqhdkdhwqkdwqd98789} componentwise as $$DD^{-}(M,\cF)(p)\otimes DD^{-}(M,\cF)(q)\to DD^{-}(M,\cF)(p+q)\ .$$
The description of the product for $DD^{per}$ is similar.

\begin{rem}{\rm The cohomology of $DD^{per}(M)$ is the two-periodic de Rham cohomology of $M$.
It is the natural target of the Chern character from topological $K$-theory, see Definition \ref{ffwefwefewfewfwfw}.
The complex $DD^{-}(M,\cF)$ will receive characteristic forms for vector bundles with connections which are flat in the direction of the foliation, see Definition \ref{qldjqwldqwdqwdqwd}.
}\end{rem}

\section{Vector bundles with flat partial connections}\label{fhfjlwefkjfewfewfewfwf}

We introduce the notion of a vector bundle with a flat partial connection on a foliated manifold.

\bigskip

We consider a foliated manifold $(M,\cF)$.
Let $V\to M$ be a complex vector bundle.

\begin{ddd}\label{lwfwfewfewfewf}
A partial connection on $V$ is a map
$$\nabla:\Gamma(M,V)\to \Gamma(M,\cF^{*}\otimes V)$$ which satisfies the Leibniz rule.
\end{ddd}

\begin{rem}{\rm For sections $X\in \Gamma(M,\cF)$ and $\phi\in \Gamma(M,V)$ we write as usual $$\nabla_{X}\phi:=i_{X}(\nabla(\phi))
\in \Gamma(M,V)$$ for the evaluation of $\nabla\phi$ at $X$. With this notation the Leibniz rule has the form
$$\nabla_{X}(f\phi)=X(f)\phi+f\nabla_{X}\phi\ , \quad \forall f\in C^{\infty}(M)\ , \quad \forall\phi\in \Gamma(M,V)\ , \quad \forall X\in \Gamma(M,\cF)\ .$$
}\end{rem}

The foliation gives rise to a graded commutative differential graded algebra whose underlying commutative graded algebra is given by $\Omega(\cF):=\Gamma(M,\Lambda^{*}\cF)$. Its differential $d^{\cF}$ is fixed by the prescription \begin{equation}\label{jdgjdhjdhk1j2hdkjhd2d2ud2du1d12d}
d^{\cF}:\Omega^{0}(\cF)\to \Omega^{1}(\cF)\ , \quad d^{\cF}(\phi):=d\phi_{|\cF}\ ,
\end{equation}
where $d$ is the usual de Rham differential and we use the identification $\Omega^{0}(\cF)= C^{\infty}(M)=\Omega^{0}(M)$.
We further write $\Omega(\cF,V):=\Gamma(M,\Lambda^{*}\cF\otimes V)$.
As in the case of usual connections we can extend $\nabla$ uniquely to a derivation on the $\Omega(\cF)$-module $\Omega(\cF,V)$.
Its curvature defined by $$R^{\nabla}:=\nabla^{2}\in \End( \Omega(\cF,V))$$ is $\Omega(\cF) $-linear and hence a two-form on $\cF$ with values in $\End(V)$, i.e. we have
$R^{\nabla}\in \Omega^{2}(\cF, \End(V))$.
\begin{ddd}\label{ijflkewfewfoieoiuoiuoiuoiuwef}
A partial connection $\nabla$ on $V$ is called flat if $R^{\nabla}=0$.
\end{ddd}

We now consider a foliated map $f:(M^{\prime},\cF^{\prime})\to (M,\cF)$.
If $V\to M$ is a vector bundle with a partial connection $\nabla$, then $f^{*}V$
has an induced partial connection $f^{*}\nabla$.
It is characterized by
\begin{equation}\label{gf433g34g3353454353453tr}
\nabla_{X}(f^{*}\phi)=f^{*}\nabla_{df(X)}\phi\ , \quad \forall m^{\prime}\in M^{\prime}\ , \quad \forall X\in T_{m^{\prime}}M^{\prime}\ , \quad \phi\in \Gamma(M,V)\ .
\end{equation}
This formula has to be understood as an equality between elements in the fibre $(f^{*}V)_{m^{\prime}}$.
Because of the relation
$$f^{*}R^{\nabla}=R^{f^{*}\nabla}$$
the pull-back of a flat partial connection is again flat.

\begin{ex}{\rm If $M$ is a complex manifold with foliation $\cF=T^{0,1}M$, then a flat partial connection on a complex vector bundle $V$ is the same as a holomorphic structure. In this situation the flat partial connection is usually denoted by $\bar \partial$.
}
\end{ex}

\begin{ex}{\rm If $M$ is equipped with the minimal foliation, then a partial connection on a vector bundle is no additional data.
In the opposite case, where $M$ has the maximal foliation, a flat partial connection is the same as a flat connection.}
\end{ex}
\begin{ex}\label{fklwefjwefewf}{\rm Let $$\cF^{\perp}:=T_{\C}M/\cF$$ be the normal bundle of a foliation.
Then $\cF^{\perp}$ has a natural flat partial connection $\nabla^{\cF^{\perp}}$. It is given by
$$\nabla_{X}[Y]:= [\ [X,Y]\ ]\ ,$$ where $ X\in \Gamma(M,\cF)$ and the vector field $Y\in \Gamma(M,T_{\C}M)$ represents the section $[\ Y\ ] \in \Gamma(M,\cF^{\perp})$ of the normal bundle.}
\end{ex}

\begin{ex}\label{jfwlefjlwefewfewfewfw}{\rm Let $\pi:W\to B$ be a submersion and consider the vertical foliation $T^{v}\pi$ on $W$, see Example \ref{djqlwdqwdqwdq}.
If $V\to B$ is any vector bundle, then $\pi^{*}V\to W$ has a canonical flat partial connection $\nabla^{I}=\pi^{*}\nabla$, where $\nabla$ is the canonical flat partial connection on $V$ in the direction of trivial foliation. In view of \eqref{gf433g34g3353454353453tr}
it is characterized by the condition that for $\phi\in \Gamma(B,V)$ we have $\nabla^{I}\pi^{*}\phi=0$.
}
\end{ex}

\section{Connections and characteristic forms}

We introduce the Chern character forms and Chern forms of complex vector bundles with connection. In the foliated case we discuss the consequences of the fact that the connection extends a flat partial connection.

\bigskip

Let $(M,\cF)$ be a foliated manifold and $(V,\nabla^{I})$ be a complex vector bundle with a flat partial connection.

\begin{ddd} \label{fjwelfwfewf42343242wf}
A connection $\nabla$ on $V$ is an extension of $\nabla^{I}$, if the relation
$\nabla_{X}\phi=\nabla^{I}_{X}\phi$ holds for all $\phi\in \Gamma(M,V)$ and $X\in \Gamma(M,\cF)$.
\end{ddd}

One can show that a flat partial connection admits extensions. Furthermore, the set of extensions of a flat partial connection is a torsor over the complex vector space
$$\Gamma(M, \cF^{\perp,*}\otimes \End(V))\ .$$

\begin{ex}{\rm A connection on $\cF^{\perp}$ which extends the partial flat connection $\nabla^{I,\cF^{\perp}}$ of Example \ref{fklwefjwefewf} is called a Bott connection.}
\end{ex}

\begin{ex}\label{fjewflewjflewfewf}{\rm Let $f:W\to B$ be a submersion and $\cF^{v}$ be the vertical foliation (Example \ref{djqlwdqwdqwdq}). If $V\to B$ is a complex vector bundle, then
$f^{*}V\to W$ has a canonical flat partial connection $\nabla^{I}$, see Example \ref{jfwlefjlwefewfewfewfw}. If $\nabla$ is any connection on $V$, then $f^{*}\nabla$ extends $\nabla^{I}$.

More generally, if $f:W\to B$ is transverse to a foliation $\cF$ on $B$ and $(V,\nabla^{I})$ is a vector bundle with flat partial connection on $B$, then $(f^{*}V,f^{*}\nabla^{I})$ is a vector bundle with flat partial connection on $W$, where $W$ is equipped with the foliation $f^{-1}\cF$, see Example \ref{dekldlqwdqwdqwd}. If $\nabla$ is a connection on $V$ extending $\nabla^{I}$, then $f^{*}\nabla $ is a connection on $f^{*}V$ extending $f^{*}\nabla^{I}$.

}
\end{ex}

If $\nabla$ is a connection on a complex vector bundle, then we consider its curvature
$$R^{\nabla}:=\nabla^{2}\in \Omega^{2}(M,\End(V))\ .$$ The Chern character form of $\nabla$ is the closed inhomogeneous complex-valued form
$$\ch_{0}(\nabla)+\ch_{2}(\nabla)+\ch_{4}(\nabla)+\dots:=\Tr \exp(-\frac{R^{\nabla}}{2\pi i})$$
with homogeneous components $\ch_{2i}(\nabla)\in \Omega^{2i}_{cl}(M)$. We will consider the Chern character form as a zero cycle in the periodic complex $DD^{per}(M)$.

\begin{ddd}\label{ffwefwefewfewfwfw}
We define
$$\ch(\nabla):=(\ch_{2p}(\nabla))_{p\in \Z}\in Z^{0}(DD^{per}(M))\ .$$
\end{ddd}

\begin{rem}\label{dkjqwlkdjqwlkjdlwqkdwdwqdqwd}{\rm In this remark we explain how the Chern character form behaves under complex conjugation and inserting adjoint connections. First of all,
if we choose a hermitean metric $h$ on the bundle $V$ with connection $\nabla$, then we can form the adjoint connection $\nabla^{*}$ which is characterized by the relation in $\Omega^{1}(M)$:
$$dh(\phi,\psi)=h(\nabla\phi,\psi)+h(\phi,\nabla^{*}\psi)\ , \quad \mbox{for all}\:\phi,\psi\in \Gamma(M,V)\ .$$ Applying $d$ to this equality again we get
$$0=h(R^{\nabla}\phi,\psi)+h(\phi, R^{\nabla^{*}}\psi)\ .$$ In view of the $2\pi i$-factor in the definition of the Chern character form this equality implies the relation \begin{equation}\label{ccewcwecweewwerewr}
\overline{\ch(\nabla)}=\ch(\nabla^{*})\ .
\end{equation}
The connection $\nabla$ is called unitary if $\nabla^{*}=\nabla$. In this case, the Chern character form $\ch(\nabla)$ is real.}
\end{rem}

Let $(V,\nabla^{I})$ be a complex vector bundle on $(M,\cF)$ with a flat partial connection $\nabla^{I}$. We further choose a hermitean metric $h$ on $V$.

\begin{ddd}\label{kdlqwdqwdqwdqd}
A flat partial connection $\nabla^{I}$ is called unitary (with respect to $h$), if $$ d^{\cF}h(\phi,\psi)=h(\nabla^{I}\phi,\psi)+h(\phi,\nabla^{I}\psi)$$ for all $\phi,\psi\in \Gamma(M,V)$.
\end{ddd}
See \eqref{jdgjdhjdhk1j2hdkjhd2d2ud2du1d12d} for $d^{\cF}$.

\begin{lem}\label{dhqwkdqkwddqwdwqdioipopioipoopi}
If $\nabla^{I}$ is unitary (with respect to $h$), then it admits an unitary extension $\nabla$.
\end{lem}
\proof Let $\nabla_{0}$ be some extension of $\nabla^{I}$. Then $\nabla^{*}_{0}$ is a second extension of $\nabla^{I}$ and
$$\nabla:=\frac{1}{2}(\nabla_{0}+\nabla_{0}^{*})$$
is an unitary extension of $\nabla^{I}$. \hB

\bigskip

Note that the filtration of $\Omega(M,\cF)$ introduced in Definition \ref{ilfjewlfwfewfewfewfwfw} induces a filtration on $\Omega(M,\End(V))$ which is compatible with the $\Omega(M,\cF)$-module structure.

\begin{lem}If $\nabla$ extends a flat partial connection $\nabla^{I}$ on $V$, then
$R^{\nabla}\in F^{1}\Omega^{2}(M,\End(V))$.
\end{lem}
\proof We have
$$R^{\nabla}_{|\Lambda^{2}\cF }=R^{\nabla^{I}}=0\ .$$ \hB This Lemma has consequences for the Chern character forms.

\begin{kor}\label{fwefwefewfewfewfewf2434ewfewfwefewf}
If $\nabla$ extends a flat partial connection on $V$,
$$\ch_{2i}(\nabla)\in F^{i}\Omega^{2i}_{cl}(M,\cF)\ .$$
\end{kor}

\begin{ddd}\label{qldjqwldqwdqwdqwd}
If $\nabla$ extends a flat partial connection on $V$,
then we define
$$\ch^{-}(\nabla):=(\ch_{2p}(\nabla))_{p\in \Z}\in Z^{0}(DD^{-}(M,\cF))\ .$$
\end{ddd}

We let ${\mathbf{Vect}}^{\flat,\nabla}(M,\cF)$ and ${\mathbf{Vect}}^{\nabla}(M)$ denote the symmetric monoidal categories (with respect to the direct sum) of pairs $(V,\nabla)$ of complex vector bundles with connection, where in the first case $\nabla$ extends a flat partial connection. In both cases morphisms are connection preserving vector bundle morphisms.

If $f:M^{\prime} \to M $ is a smooth map and $(V,\nabla)\in {\mathbf{Vect}}^{\nabla}(M)$, then we can define $(f^{*}V,f^{*}\nabla)\in {\mathbf{Vect}}^{\nabla}(M^{\prime})$ and have the relation \begin{equation}\label{wqdqwdqwdwqdwqwqdqd}
f^{*}\ch(\nabla)=\ch(f^{*}\nabla)\ .\end{equation}
Similarly, if $f:(M^{\prime},\cF^{\prime})\to (M,\cF)$ is a foliated map and $(V,\nabla)\in {\mathbf{Vect}}^{\flat,\nabla}(M,\cF)$, then
$(f^{*}V,f^{*}\nabla)\in {\mathbf{Vect}}^{\flat,\nabla}(M^{\prime},\cF^{\prime})$ and we have the relation
\begin{equation}\label{wqdqwdqwdwqdwqwqdqd1}
\ch^{-}(f^{*}\nabla)=f^{*}\ch^{-}(\nabla)\ .\end{equation}
Thus the Chern character forms are characteristic forms. In addition, they are additive, i.e. the Chern character form of a direct sum is the sum of the Chern character forms of the summands.
These properties will be important for the construction of the regulator in Subsection \ref{kfjwelfewfewfewfewfewfewfe}.

\bigskip

Let $(V,\nabla)$ be a complex vector bundle with connection on a manifold $M$. Then we define the Chern forms $$c_{2i}(\nabla)\in \Omega^{2i}(M)$$ of $\nabla$ as the homogeneous components of the following inhomogenous form
$$1-c_{1}(\nabla)+c_{2}(\nabla)-\dots=\det(1-\frac{R^{\nabla}}{2\pi i})\ .$$
The Chern forms can be expressed as homogeneous polynomials in the Chern character forms. In particular, if $(M,\cF)$ is a foliated manifold and $\nabla$ extends a flat partial connection, then we have \begin{equation}\label{jlkjlwejflkewjfewf89798}
c_{2i}(\nabla)\in F^{i}\Omega^{2i}_{cl}(M,\cF)\ .\end{equation}

\section{Characteristic forms of real foliations}

We introduce characteristic forms of real vector bundles on real foliated manifolds. We in particular discuss the $\hA$-form.

\bigskip

We consider a foliated manifold $(M,\cF)$. If $V\to M$ is a real vector bundle on a real foliated manifold $(M,\cF)$, then in analogy with Definitions \ref{lwfwfewfewfewf} and \ref{ijflkewfewfoieoiuoiuoiuoiuwef} we have the notion of a flat partial connection $$\nabla^{I}:\Gamma(M,V)\to \Gamma(M,\cF_{\R}^{*}\otimes V)$$ on $V$.
We furthermore have the notion of a connection $\nabla$ on $V$ extending $\nabla^{I}$ (compare with Definition \ref{fjwelfwfewf42343242wf})

\bigskip

Let $V\to M$ be a real vector bundle with connection $\nabla$. We let $\nabla_{\C}$ denote the induced connection on the complexification $V\otimes \C$.
We define the Pontrjagin forms of $\nabla$ by $$p_{i}(\nabla):=(-1)^{i}c_{2i}(\nabla_{\C})\in \Omega_{cl}^{4i}(M)\ .$$
If $(M,\cF)$ is real foliated and $\nabla$ extends a flat partial connection, then by \eqref{jlkjlwejflkewjfewf89798} we have \begin{equation}\label{wwejoijiofwef9789}
p_{i}(\nabla)\in F^{2i}\Omega^{4i}_{cl}(M,\cF)\ .\end{equation}

\bigskip

In order to define the $\hA$-form we consider the symmetric polynomials $p_{i}$ of degree $4i$ in variables $x_{\ell}$ of degree $2$ defined by the relation
$$1+p_{1} +p_{2} +\dots=\prod_{\ell} (1-x_{\ell}^{2})\ .$$
We define homogeneous polynomials $\hA_{4k} $ for $k\ge 1$ in the variables $p_{i}$ by the relation
$$ 1+\hA_{4} +\hA_{8} +\dots=\prod_{\ell}\frac{\frac{x_{\ell}}{2}}{\sinh(\frac{x_{\ell}}{2})}\ .$$
Then the components of the $\hA$-form are defined by
$$\hA_{4i}(\nabla):=\hA_{4i}(p_{1}(\nabla),p_{2}(\nabla),\dots) \in \Omega_{cl}^{4i}(M)\ .$$
If $(M,\cF)$ is real foliated and the connection $\nabla$ extends a flat partial connection, then by \eqref{wwejoijiofwef9789} we have
$$\hA_{4i}(\nabla)\in F^{2i}\Omega^{4i}_{cl}(M,\cF)$$

\bigskip

We again want to consider the $\hA$-form as a zero cycle of $DD^{per}(M)$, or of $DD^{-}(M,\cF)$ in the foliated case. In order to simplify the notation we set $\hA_{2i}(\nabla):=0$ if $i$ is odd.

\begin{ddd} If $\nabla$ is a connection on a real vector bundle on $M$, then we define
$$\hA(\nabla):=(\hA_{2p}(\nabla))_{p\in \Z}\in Z^{0}(DD^{per}(M))\ .$$
If $(M,\cF)$ is a real foliated manifold and $\nabla$ extends a flat partial connection, then we define
$$\hA^{-}(\nabla):=(\hA_{2p}(\nabla))_{p\in \Z}\in Z^{0}(DD^{-}(M,\cF))\ .$$
\end{ddd}

The $\hA$-form is multiplicative, i.e. the $\hA$-form of a direct sum of connections is the product of $\hA$-forms. Furthermore, the $\hA$-form of a trivial connection is the multiplicative unit.

\begin{ex}\label{kjlwefwefwef}{\rm If $(M,\cF)$ is a real foliated maniflold, then the real normal bundle $$\cF_{\R}^{\perp}:=TM/\cF_{\R}$$ of the foliation has a flat partial connection $\nabla^{I,\cF_{\R}^{\perp}}$ similar as in Example \ref{fklwefjwefewf}. If we choose a connection$ \nabla^{\cF_{\R}^{\perp}}$ extending $\nabla^{I,\cF_{\R}^{\perp}}$, then we obtain a cycle
$$\hA^{-}(\nabla^{\cF_{\R}^{\perp}})\in Z^{0}(DD^{-}(M,\cF))\ .$$
}
\end{ex}

\section{Transgression}

We introduce the transgression of characteristic forms and discuss its basic properties. In the case of foliations we discuss the consequences of the fact that the connections extend a fixed flat partial connection.

\bigskip

We consider the unit interval $I:=[0,1]$ with coordinate $t$.
For $i=0,1$ let $\iota_{i}:*\to I$ be the inclusions of the endpoints of the interval.
Let $M$ be a smooth manifold and $V\to M$ be a vector bundle.
Given two connections $\nabla_{0}$ and $\nabla_{1}$ we can consider a connection $\tilde \nabla$ on $\pi^{*}V\to I\times M$ such that $(\iota_{i}\times \id_{M})^{*}\tilde \nabla=\nabla_{i}$ for $i=0,1$. For example we could take the linear interpolation $t\pi^{*}\nabla_{1}+(1-t)\pi^{*}\nabla_{0}$.
\bigskip

The integration of forms along the fibre of $\pi:I\times M\to M$ is a map of graded vector spaces
$$\int_{I\times M/M}:\Omega(I\times M)\to \Omega(M)[-1]\ .$$ It induces a map
$$\int_{I\times M/M} :DD^{per}(I\times M)\to DD^{per}(M)[-1]\ .$$ Since the interval $I$ has a non-empty boundary the integration is not a morphism of complexes. In fact,
by Stoke's theorem we have the relation
\begin{equation}\label{dqwdwqdwqdqdw}
(\iota_{1}\times \id_{M})^{*}-(\iota_{0}\times \id_{M})^{*}=d\circ \int_{I\times M/M}+\int_{I\times M/M}\circ d\ .
\end{equation} \begin{ddd}
The transgression of the Chern character form is defined by
$$\widetilde{\ch}(\nabla_{1},\nabla_{0}):=\int_{I\times M/M} \ch(\tilde \nabla)\in DD^{per}(M)^{-1}/\im(d)\ .$$
\end{ddd}
From \eqref{dqwdwqdwqdqdw} and the facts that the Chern character forms are closed and natural (see \eqref{wqdqwdqwdwqdwqwqdqd})
we immediately get the identity \begin{equation}\label{hfjwkjehfkjwehfewfewf897987}
d \widetilde{\ch}(\nabla_{1},\nabla_{0}):=\ch(\nabla_{1})-\ch(\nabla_{0})\ .
\end{equation}
One can check that transgression is independent of the choice of the connection $\tilde \nabla$ interpolating between $\nabla_{0}$
and $\nabla_{1}$. At this point it is relevant that we consider the transgression as a class modulo exact forms.
Furthermore, we have the identities \begin{equation}\label{fewfwefwefwefew32434234234}
\widetilde{\ch}(\nabla_{1},\nabla_{0})+\widetilde{\ch}(\nabla_{2},\nabla_{1})+\widetilde{\ch}(\nabla_{0},\nabla_{2})=0\ , \quad \widetilde{\ch}(\nabla_{1},\nabla_{0})+\widetilde{\ch}(\nabla_{0},\nabla_{1})=0\ .
\end{equation}
In order to see e.g. the first equality in \eqref{fewfwefwefwefew32434234234} one can integrate the Chern form of an interpolation between the three connections over a two-simplex.
\begin{rem}{\rm If we choose a hermitean metric on $V$, then we can form the adjoint connections. From
\eqref{ccewcwecweewwerewr} we get the relation \begin{equation}\label{qwdqwdqwdwqdqwdwqdqwdqwd}
\overline{\widetilde{\ch}(\nabla_{1},\nabla_{0})} =\widetilde{\ch}(\nabla^{*}_{1},\nabla^{*}_{0})\ .
\end{equation}
}\end{rem}

\bigskip

We now assume that $(M,\cF)$ is foliated and that the connections $\nabla_{i}$ for $i=0,1$ extend the same flat partial connection $\nabla^{I}$.
Then we can equip $I\times M$ with the foliation $T_{\C}I\boxplus \cF$ introduced in Example \ref{dekldlqwdqwdqwd}.
We can furthermore find an interpolation $\tilde \nabla$ which extends the flat partial connection $\pi^{*}\nabla^{I}$, e.g. the linear interpolation.

\bigskip

We now observe that the integration preserves the filtration, i.e.
$$\int_{I\times M/M}:F^{p}\Omega^{k}(I\times M,T_{\C}I\boxplus \cF)\to F^{p}\Omega^{k-1}(M,\cF) \ .$$
Hence we get an induced map
$$\int_{I\times M/M}:DD^{-}(I\times M,T_{\C}I\boxplus \cF)\to DD^{-}(M,\cF)[-1]\ .$$

\begin{ddd} Let $(M,\cF)$ be a foliated manifold and $(V,\nabla^{I})$ be a complex vector bundle with a flat partial connection on $M$. If $\nabla_{0}$ and $\nabla_{1}$ are two connections on $V$ extending $\nabla^{I}$, then we define transgression of the Chern character form by
$$\widetilde{\ch^{-}}(\nabla_{1},\nabla_{0}):=\int_{I\times M/M} \ch^{-}(\tilde \nabla)\in DD^{-}(M,\cF)^{-1}/\im(d)\ .$$
\end{ddd}

Note again, that $\widetilde{\ch^{-}}(\nabla_{1},\nabla_{0})$ is independent of the choice of the interpolation $\tilde \nabla$.

\begin{ex}{\rm We consider a foliated manifold $(M,\cF)$ and a complex vector bundle $V \to M$.
If $p\in \nat$ is such that $p>\mathrm{codim}(\cF)$, then we have $F^{p}\Omega^{2p}(M)=0$.

\bigskip

Assume that $\nabla^{I}_{0}$ and $\nabla^{I}_{1}$ are two flat partial connections on a complex vector bundle $V$ and $\nabla_{0}$, $\nabla_{1}$ be corresponding extensions. If $p>\mathrm{codim}(\cF)$, then
$\widetilde{\ch}_{2p}(\nabla_{1},\nabla_{0})$ is closed since by \eqref{dqwdwqdwqdqdw} and Corollary \ref{fwefwefewfewfewfewf2434ewfewfwefewf} its differential belongs to $F^{p}\Omega^{2p}(M)=0$.
Its cohomology class does not depend on then choice of the extensions $\nabla_{1}$ and $\nabla_{0}$.
We therefore get a secondary characteristic class $$c(\nabla_{1}^{I},\nabla_{0}^{I}):=\widetilde{\ch}_{2p}(\nabla_{1},\nabla_{0})\in H^{2p-1}(M;\C)\ .$$
}\end{ex}

\begin{ex}{\rm The following construction generalizes the Kamber-Tondeur classes (introduced in this form in \cite{MR1303026}) to the foliated case.
Let $\nabla^{I}$ be a flat partial connection. If we choose a hermitean metric $h^{V}$ on $V$, then we can define an adjoint flat partial connection $\nabla^{I,*}$.
It is characterized by the relation
$$d^{\cF}h(\phi,\psi)=h(\nabla^{I}\phi,\psi)+h(\phi,\nabla^{I,*}\psi)\ , \quad \phi,\psi\in \Gamma_{c}(M,V)\ .$$

Let $\nabla$ be an extension of $\nabla^{I}$. Then $\nabla^{*}$ extends $\nabla^{I,*}$.
We consider the form $$\widetilde{\ch}_{2p}(\nabla,\nabla^{*})\in F^{p}\Omega^{2p-1}(M,\cF)/\im(d)\ .$$
By \eqref{qwdqwdqwdwqdqwdwqdqwdqwd} and \eqref{fewfwefwefwefew32434234234} we have the relation
\begin{equation}\label{dewdwedwedewded}
\overline{\widetilde{\ch}_{2p}(\nabla,\nabla^{*})}= \widetilde{\ch}_{2p}(\nabla^{*},\nabla)= -\widetilde{\ch}_{2p}(\nabla,\nabla^{*})\ ,
\end{equation} i.e. the form $\widetilde{\ch}_{2p}(\nabla,\nabla^{*})$ is imaginary.

\bigskip

For $p>\mathrm{codim}(\cF)$ the class
$$c_{2p-1}(\nabla^{I}):=c(\nabla^{I},\nabla^{I,*})\in H^{2p-1}(M;\C) $$
does not depend on the choice of the hermitean metric metric.
By \eqref{dewdwedwedewded} it is imaginary, i.e. it belongs to the real subspace $iH^{2p-1}(M;\R)\subseteq H^{2p-1}(M;\C) $.
\bigskip

We can apply this construction to the bundle $\cF^{\perp}$ with its canonical flat partial connection $\nabla^{I,\cF^{\perp}}$, see Example \ref{fklwefjwefewf}. The class \begin{equation}\label{kfkwejwlkefjlwekfjewfewfopipoi234}
c_{2p-1}(\nabla^{I,\cF^{\perp}})\in H^{2p-1}(M;\C)
\end{equation} is closely related to the Godbillon-Vey class of the foliation. If the foliation $\cF$ is real, then we can explain the place of this invariant in the classification of characteristic classes for foliations. See Remark \ref{ergegojerglerogergeg}, in particular \eqref{ewfwefewfwf432342344123}.

}
\end{ex}

Let $V\to M$ be a real bundle. Then using a similar notation as above we can define
\begin{equation}\label{nkdqlwdwqddwqdj09809}
\widetilde{\hA}(\nabla_{1},\nabla_{0}):=\int_{I\times M/M}\hA(\tilde \nabla)\in DD^{per}(M)^{-1}/\im(d)\ .\end{equation}
We have

\begin{equation}\label{1e1h2ekj12ej12ke21hek2eh}d \widetilde{\hA}(\nabla_{1},\nabla_{0}):=\hA(\nabla_{1})-\hA(\nabla_{0})\ .\end{equation} The transgression is independent of the choice of the connection $\tilde \nabla$. Furthermore, we have the identities $$\widetilde{\hA}(\nabla_{1},\nabla_{0})+\widetilde{\hA}(\nabla_{2},\nabla_{1})+\widetilde{\hA}(\nabla_{0},\nabla_{2})=0\ , \quad \widetilde{\hA}(\nabla_{1},\nabla_{0})+\widetilde{\hA}(\nabla_{0},\nabla_{1})=0\ . $$

Finally, if $\nabla_{1}$ and $\nabla_{0}$ extend the same flat partial connection, then we can define
$$\widetilde{\hA^{-}}(\nabla_{1},\nabla_{0}):=\int_{I\times M/M}\hA(\tilde \nabla)\in DD^{-}(M)^{-1}/\im(d)\ .$$

We now assume that the foliation $\cF$ is real.
Let $g^{TM}$ be a Riemannian metric on $M$.
We get a decomposition $TM\cong \cF_{\R}\oplus \cF_{\R}^{\perp}$ and choose a connection $\nabla^{\cF_{\R}^{\perp}}$ extending $\nabla^{I,\cF_{\R}^{\perp}}$. We further assume that $\cF_{\R}$ has a stable framing $s $. Let $\nabla^{\cF_{\R},triv}$ be the associated stable trivial connection on $\cF_{\R}$. We have forms
$$\widetilde{\hA}(\nabla^{LC},\nabla^{\cF_{\R},triv}\oplus \nabla^{\cF_{\R}^{\perp}})\in DD^{per,-1}(M) \ , \quad \hA(\nabla^{\cF_{\R},triv}\oplus \nabla^{\cF_{\R}^{\perp}})=\hA(\nabla^{\cF_{\R}^{\perp}})\in DD^{-,0}_{cl}(M)\ .$$

\section{Differential $K$-theory}\label{jfkelwfwefewfewfewfewfwef}

In this subsection we recall some basic features of the Hopkins-Singer version of differential complex $K$-theory.

\bigskip

References for the following material are the foundational paper by Hopkins-Singer \cite{MR2192936}, but also \cite{MR2732065} and \cite{MR2664467}. For differential orientations and Umkehrmaps we refer to \cite{MR2664467}, \cite{Freed-Lott}, and \cite{2012arXiv1208.3961B}

\subsection{Basic structures}

We describe the differential extension $\widehat{KU}^{*}$ of the generalized cohomology theory $\mathbf{KU}^{*}$. Here $ \mathbf{KU}^{*}$ is the periodic topological complex $K$-theory which is represented by the spectrum $\mathbf{KU}$.
For every $p\in \Z$ we have a contravariant functor
$$\widehat{KU}^{p}:\Mf^{op}\longrightarrow \Ab $$
from smooth manifolds to abelian groups. This functor is connected with periodic topological complex $K$-theory $\mathbf{KU}^{*}$ via a transformation $$I:\widehat{KU}^{p}\to \mathbf{KU}^{p}$$ of abelian group valued functors. The transformation $I$
maps differential $K$-theory classes to their underlying topological $K$-theory classes.
Furthermore, differential $K$-theory is connected with differential forms through natural transformations $R$ and $a$.
The curvature $R$ is a natural transformation
$$ R:\widehat{KU}^{p}\to Z^{p}(DD^{per}(M)) \ .$$
In particular, if $x\in \widehat{KU}^{p}(M)$, then $R(x)\in Z^{p}(DD^{per}(M))$
is a differential form representing the Chern character of the underlying topological class $I(x)$.
The transformation $$a: DD^{per,p-1}/\im(d) \to \widehat{KU}^{p}$$ encodes the secondary information contained in differential $K$-theory classes.
All these structures and their compatibilities are nicely encoded in the following commutative diagram, also called the differential cohomology diagram \cite{MR2365651}:
$$\xymatrix{& DD^{per,p-1} /\im(d)\ar[dr]^{a}\ar[rr]^{d}&&Z^{p}(DD^{per} )\ar[dr]^{Rham}&\\H^{p-1}(DD^{per} ) \ar[ur]\ar[dr]&&\widehat{KU}^{p} \ar[ur]_{R}\ar[dr]^{I}&&H^{p}(DD^{per} ) \\&\mathbf{KU}\C/\Z^{p-1} \ar[ur]\ar[rr]^{Bockstein}&&\mathbf{KU}^{p} \ar[ur]^{\ch}&}\ .$$
Its upper and lower parts are segments of long exact sequences, and the diagonals are exact at the center.

\bigskip

The flat part of differential $K$-theory is defined as the kernel of the curvature transformation $R$: $$\widehat{KU}^{p}_{flat}:=\ker(R:\widehat{KU}^{p}\to Z^{p}(DD^{per}))\ .$$ It is canonically isomorphic to $\C/\Z$-$K$-theory (with a shift):\begin{equation}\label{eq700hhh}\widehat{KU}^{p}_{flat} \cong \mathbf{KU} \C/\Z^{p-1} \ .\end{equation}

\bigskip

The sequence
\begin{equation}\label{eq700}\mathbf{KU}^{p-1}\stackrel{\ch}{\to} DD^{per,p-1}/\im(d)\stackrel{a}{\to} \widehat{KU}^{p}\stackrel{I}{\to} \mathbf{KU}^{p}\to 0\end{equation}
is exact.

\bigskip

The differential $K$ -theory of a point is given by
\begin{equation}\label{djhqkjwdqwdq9879}\widehat{KU}^{p}(*)\cong \left\{\begin{array}{cc} \Z&\mbox{$p$ is even}\\
\C/\Z&\mbox{$p$ is odd}\end{array}\right.\ .\end{equation}

\bigskip

Differential $K$-theory is not homotopy invariant. The deviation from homotopy invariance is quantified by the homotopy formula. If $\hat x\in \widehat{KU}^{p}([0,1]\times M)$, then the homotopy formula states that
\begin{equation}\label{homotopyformula}(\iota_{1}\times \id_{M})^{*}\hat x-(\iota_{0}\times \id_{M})^{*}\hat x=a(\int_{[0,1]\times M/M} R(\hat x))\ . \end{equation}

\subsection{The cycle map}

A complex vector bundle with connection $(V,\nabla)$ on a manifold $M$ gives rise to a differential $K$-theory class
$[V,\nabla]\in \widehat{KU}^{0}(M)$ such that \begin{equation}\label{dkdqwdwqdqwdwqdkl8997}R([V,\nabla])=\ch(\nabla)\in Z^{0}(DD^{per}(M))\ , \quad I([V,\nabla])=[V]\in \mathbf{KU}^{0}(M) \end{equation}

Let $M\mapsto \pi_{0}({\mathbf{Vect}}^{\nabla}(M))$ denote the contravariant functor which associates to the manifold $M$ the commutative monoid (induced the direct sum) of isomorphisms classes of pairs $(V,\nabla)$ of vector bundles with connection on $M$ and to a smooth map between manifolds $f:M\to M^{\prime}$ the pull-back $f^{*}$.
The additive natural transformation
$$[\dots]:\pi_{0}({\mathbf{Vect}}^{\nabla})\to \widehat{KU}^{0}$$ is called the cycle map and fits into the commuting diagram
$$\xymatrix{&Z^{0}(DD^{per})\\\pi_{0}({\mathbf{Vect}}^{\nabla})\ar[r]^{[\dots]}\ar[ur]^{[V,\nabla]\mapsto \ch(\nabla)}\ar[dr]_{[V,\nabla]\mapsto[V]}&\widehat{KU}^{0}\ar[u]^{R}\ar[d]_{I}\\&\mathbf{KU}^{0}}\ .$$

For compact manifolds $M$ the cycle map is known to be surjective \cite{MR2732065}. Assume that $\nabla_{0}$ and $ \nabla_{1}$ are two connections on the bundle $V$. Then as a consequence of the homotopy formula \eqref{homotopyformula} we have
\begin{equation}\label{kdqklwdjqwlkdjqlwdkwqd89798}[V,\nabla_{1}]-[V,\nabla_{0}]=a(\tilde \ch(\nabla_{1},\nabla_{0}))\ .\end{equation}

\subsection{Differential orientations and Umkehr maps}\label{wlekfjwelfjewlfwef123}

Let $\pi:W\to B$ be a proper submersion such that the vertical bundle $T^{v}\pi$ has a $Spin^{c}$-structure. Then $\pi$ is equipped with an orientation $o$ (called the Atiyah-Bott-Shapiro orientation \cite{MR0167985}) for the cohomology theory $\mathbf{KU}^{*}$ and admits an Umkehr map
$$\pi^{o}_{!}:\mathbf{KU}^{p}(W)\to \mathbf{KU}^{p-d}(B)\ ,$$ where $d:=\dim(W)-\dim(B)$ is the dimension of the fibre (assume for simplicity that $B$ is connected). Since $\mathbf{KU}\C/\Z$ is a $\mathbf{KU}$-module spectrum we also have an integration
\begin{equation}\label{hjkfhjkhkjhkjefwefefwe7987}
\pi^{o}_{!}:\mathbf{KU}\C/\Z^{p}(W)\to \mathbf{KU}\C/\Z^{p-d}(B)\ .
\end{equation}

The $\mathbf{KU}$-orientation $o$ determines a cohomology class
$$\hA(o)\in H^{0}(DD^{per}(W))$$ such that the Riemann-Roch theorem holds true:
$$\xymatrix{\mathbf{KU}^{p}(W)\ar[d]^{\pi_{!}^{o}}\ar[r]^{\ch}&H^{p}(DD^{per}(W))\ar[d]^{\int_{W/B}\hA(o)\cup \dots}\\ \mathbf{KU}^{p-d}(B)\ar[r]^{\ch}&H^{p-d}(DD^{per}(B))}$$

Differential refinements of $\mathbf{KU}$-orientations have been studied in detail in \cite{MR2664467}, \cite{Freed-Lott}, see also
\cite{2012arXiv1208.3961B} for a more homotopy-theoretic approach.
In order to refine the $\mathbf{KU}$-orientation $o$ to a $\widehat{KU}$-orientation $\hat o$ we must choose additional geometric structures. First of all we choose a metric on the vertical tangent bundle $T^{v}\pi$ and a horizontal distribution $T^{h}\pi$, i.e. a complement of the vertical bundle in $TW$. These structures induce a vertical Levi-Civity connection $\nabla^{T^{v}\pi}$, see e.g \cite[Prop. 10.2]{MR2273508}. In order to fix $\hat o$ we must further choose a $Spin^{c}$-extension $\tilde \nabla^{T^{v}\pi}$ of $\nabla^{T^{v}\pi}$. The $\widehat{KU}$-orientation $\hat o$
gives rise to an Umkehr map (see \cite[3.2.3]{MR2664467})
$$\pi^{\hat o}_{!}:\widehat{KU}^{p}(W)\to \widehat{KU}^{p-d}(B)\ .$$ The differential orientation further provides a form $$\hA(\hat o)\in Z^{0}(DD^{per}(W))$$ representing the class $\hA(o)$. The Umkehr map in $\widehat{KU}$-theory fits into the following commutative diagram \begin{equation}\label{r23r23r23r23r23r235435346546}
\hspace{-1cm}\xymatrix{\mathbf{KU}\C/\Z^{p-1}(W)\ar[d]^{\pi_{!}^{o}}\ar@/^1cm/[rr]&DD^{per}(W)^{p-1}/\im(d)\ar[d]^{\int_{W/B}\hA(\hat o)\wedge \dots}\ar[r]^(0.6){a}&\widehat{KU}^{p}(W)\ar[d]^{\pi_{!}^{\hat o}}\ar[r]^{R}\ar@/^1cm/[rr]^{I}&Z^{p}(DD^{per}(W))\ar[d]^{\int_{W/B}\hA(\hat o)\cup \dots}&KU^{p}(W)\ar[d]^{\pi_{!}^{o}}\\\mathbf{KU}\C/\Z^{p-d-1}(B)\ar@/_1cm/[rr]&DD^{per}(B)^{p-d-1}/\im(d)\ar[r]^(0.6){a}&\widehat{KU}^{p-d}(B)\ar[r]\ar@/_1cm/[rr]_{I}&Z^{p-d}(DD^{per}(B))&KU^{p-d}(B)}
\end{equation}

The set of $\widehat{KU}$-orientations refining an underlying topological $\mathbf{KU}$-orientation $o$ is a torsor over $DD^{per}(W)^{-1}/\im(d)$ such that for $\alpha\in DD^{per}(W)^{-1}/\im(d)$ we have
\begin{equation}\label{lwefwwfewfewfefewfewfewffwe}
\hA(\hat o+\alpha)=\hA(\hat o)+d\alpha .\end{equation}

We have the rules (all following from \cite[(17)]{MR2664467})
\begin{equation}\label{kdklqwjdlqwjdwldqwdqwdqd}\pi_{!}^{\hat o+\alpha}(x)=\pi_{!}^{\hat o}(x)+[\int_{M}d\alpha\wedge R(x)]_{\C/\Z}\ , \quad \pi_{!}^{\hat o}(a(\omega))=[\int_{M} \hA(\hat o)\wedge \omega]_{\C/\Z} \ .\end{equation}

If $\pi=\pi_{1}\circ \pi_{0}$ is a composition of proper submersions and $\hat o_{i}$ are $\widehat{KU}$-orientations of $\pi_{i}$, then we can define a composed orientation $\hat o=\hat o_{1}\circ \hat o_{0}$ for $\pi$ in a natural way such that
\begin{equation}\label{e23e23e3e23e32e32e2}
\pi_{!}^{\hat o}=\pi^{\hat o_{1}}_{1,!}\circ \pi_{0,!}^{\hat o_{0}}\ .
\end{equation}
If
$$\xymatrix{W^{\prime}\ar[d]^{\pi^{\prime}}\ar[r]^{g}&W\ar[d]^{\pi}\\B^{\prime}\ar[r]^{f}&B}$$
is a cartesian diagram and $\hat o$ is a differential orientation of $\pi$, then we can define a $\widehat{KU}$-orientation $\hat o^{\prime}$ of $\pi^{\prime}$ such that
$$\pi^{\prime,\hat o^{\prime}}_{!}\circ g^{*}=f^{*}\circ \pi_{!}^{\hat o}\ .$$

In order to avoid the additional complexity of the choice of $Spin^{c}$ extensions of connections on real vector bundles with $Spin^{c}$-structures in the present paper we will work with $Spin$-structures.
If $T^{v}\pi$ has a spin structure, then it has an induced $Spin^{c}$-structure, and a connection
$ \nabla^{T^{v}\pi}$ has a canonical $Spin^{c}$-extension which we take from now on.
If the $\widehat{KU}$-orientation $\hat o$ is defined using the vertical metric and the horizontal distribution as above, then we have \begin{equation}\label{fwefw4343433fewfwf}
\hA(\hat o)=\hA(\nabla^{T^{v}\pi})\ ,\end{equation}
where $\nabla^{T^{v}\pi}$ is the Levi-Civita connection.

\bigskip

Assume that $\pi:W\to B$ is a submersion with fibrewise boundary $\partial \pi:\partial W\to B$.
If $\hat o$ is a $\widehat{KU}$-orientation of $\pi$, then we can define an induced $\widehat{KU}$-orientation $\partial \hat o$ of $\partial \pi$. In this situation we have the bordism formula \cite[Prop. 5.18]{MR2664467}. If $\hat x\in \widehat{KU}^{p}(W)$, then \begin{equation}\label{jkhdkqjwdhqwdwqdqwdqwdqwd789}
\partial\pi^{\partial \hat o}_{!}(\hat x_{|\partial W})=a(\int_{W/B} \hA(\hat o)\wedge R(\hat x ))\ .\end{equation}

\section{The invariant}\label{flkwjefwkejfeflkewjflkefjelkfjelfjewlfewf9790}

Given a closed odd-dimensional real foliated spin manifold $(M,\cF)$ such that $$2\mathrm{codim}(\cF)< \dim(M)$$ with a stable real framing $s$ of $\cF$ and a complex vector bundle $(V,\nabla^{I})$ with flat partial connection we define an invariant $$\rho(M,\cF,\nabla^{I},s)\in \C/\Z\ .$$

\bigskip

In order to define the invariant we first choose the following additional geometric data:
\begin{enumerate}
\item We choose a connection $\nabla$ on $V$ which extends $\nabla^{I}$, see Definition \ref{fjwelfwfewf42343242wf}.
\item We choose an extension $\nabla^{\cF_{\R}^{\perp}}$ of the flat partial connection $\nabla^{I,\cF^{\perp}_{\R}}$, see Example \ref{kjlwefwefwef}.

\item We choose a Riemannian metric $g^{TM}$.
\end{enumerate}

For $n\in \nat$ we let $\underline{\R^{n}}$ denote the trivial $n$-dimensional real vector bundle on $M$.
A stable real framing $s$ of $\cF$ is an isomorphism of real vector bundles
$$s:\cF_{\R}\oplus \underline{\R^{n}}\cong \underline{\R^{m}}$$
for certain choices of $n,m\in \nat$. The framing and the trivial connection on $\underline{\R^{m}}$ induce a connection $\nabla^{s}$ on $\cF_{\R}\oplus \underline{\R^{n}}$.
The Riemannian metric on $M$ further induces an orthogonal splitting $TM\cong \cF_{\R}\oplus \cF_{\R}^{\perp}$ so that we can consider both connections
$\nabla^{LC}\oplus \nabla^{\underline{\R^{n}}}$ and $\nabla^{s}\oplus \nabla^{\cF^{\perp}_{\R}}$ on the same bundle $$TM\oplus \underline{\R^{n}}\cong (\cF_{\R}\oplus \underline{\R^{n}})\oplus \cF_{\R}^{\perp}\ .$$ In particular, we can define the transgression form (see \eqref{nkdqlwdwqddwqdj09809})
$$\widetilde{\hA}(LC,s):=\widetilde{\hA}(\nabla^{LC}\oplus \nabla^{\underline{\R^{n}}},\nabla^{s}\oplus \nabla^{\cF^{\perp}_{\R}})\in DD^{per}(M)^{-1}/\im(d)\ ,$$
where $\nabla^{LC}$ is the Levi-Civita connection on $TM$ associated to the Riemannian metric $g^{TM}$.

\bigskip

We now consider the map $\pi:M\to *$. Since $M$ is closed this is a proper submersion.
Since $M$ is spin, this map has a $KU$-orientation $o$. The choice of a Riemannian metric refines the orientation $o$ to a $\widehat{KU}$--orientation $\hat o$ (note that the horizontal bundle is the zero bundle), see Subsection \ref{wlekfjwelfjewlfwef123}.

\begin{ddd}\label{flwefjwefewff} Let
$M$ be an odd-dimensional real foliated closed spin manifold, $s$ be a stable real framing of $\cF$, and $\nabla^{I}$ be a flat partial connection on a complex vector bundle on $M$. Assume further that we have fixed $g^{TM}$, $\nabla^{\cF_{\R}^{\perp}}$, and $\nabla$.
Then we define
$$\rho(M,\cF,\nabla^{I},s):=\pi_{!}^{\hat o-\widetilde{\hA}(LC,s)}([V,\nabla])\in \widehat{KU}^{-\dim(M)}(*)\stackrel{\eqref{djhqkjwdqwdq9879}}{\cong} \C/\Z\ .$$
\end{ddd}

In general, this quantity depends on the additional choices $g^{TM}$, $\nabla^{\cF_{\R}^{\perp}}$, and $\nabla$. It will be a consequence of the bordism invariance, that $\rho(M,\cF,\nabla^{I},s)$ is actually independent of these choices provided $2\mathrm{codim}(\cF)<\dim(M)$.

\begin{prop} \label{fklwfjeklfwefewfwf897}
Assume that $(M,\cF)$, $\nabla^{I}$, $s$ as well as $g^{TM}$, $\nabla^{\cF_{\R}^{\perp}}$, and $\nabla$ are as in Definition \ref{flwefjwefewff} with the exception that $M$ is even-dimensional and has a boundary $\partial M$ which is transversal to $\cF$. We further assume that the geometric structures have a product structure near $\partial M$.
Then we have
$$\rho(\partial M,\cF_{|\partial M},\nabla^{I}_{ |\partial M},s_{|\partial M})=[\int_{M}\hA^{-}(\nabla^{\cF_{\R}^{\perp}})\wedge \ch^{-}(\nabla)]_{\C/\Z}\ .$$
In particular, if $2\mathrm{codim}(\cF)<\dim(M)$, then $\rho(\partial M,\cF_{|\partial M},\nabla^{I}_{ |\partial M},s_{|\partial M})=0$.
\end{prop}
\proof By \eqref{jkhdkqjwdhqwdwqdqwdqwdqwd789} we have
$$\rho(\partial M,\cF_{|\partial M},\nabla^{I}_{ |\partial M},s_{|\partial M})=[\int_{M} \hA(\hat o-\widetilde{\hA}(LC,s))\wedge R([V,\nabla])]_{\C/\Z}\ .$$
Using \eqref{lwefwwfewfewfefewfewfewffwe}, \eqref{fwefw4343433fewfwf} and \eqref{dkdqwdwqdqwdwqdkl8997}
we get
$$\rho(\partial M,\cF_{|\partial M},\nabla^{I}_{ |\partial M},s_{|\partial M})=[\int_{M} (\hA(\nabla^{LC})-d\widetilde{\hA}(LC,s))\wedge \ch(\nabla) ]_{\C/\Z}\ .$$
We apply \eqref{1e1h2ekj12ej12ke21hek2eh} and the fact that $\hA$ is multiplicative in order to rewrite this as
$$\rho(\partial M,\cF_{|\partial M},\nabla^{I}_{ |\partial M},s_{|\partial M})=[\int_{M} \hA(\nabla^{\cF_{\R}^{\perp}})\wedge \ch(\nabla)]_{\C/\Z}\ .$$
We now use that both, $\nabla^{\cF_{\R}^{\perp}}$ and $\nabla$ extend flat partial connections. The associated characteristic forms therefore refine to cycles in $DD^{-}(M)$.
Hence by Example \ref{kjlwefwefwef} and Definition \ref{qldjqwldqwdqwdqwd} we have
$$ \int_{M}\hA(\nabla^{\cF_{\R}^{\perp}})\wedge \ch(\nabla)=\int_{M}\hA^{-}(\nabla^{\cF_{\R}^{\perp}})\wedge \ch^{-}(\nabla) \ .$$
This implies the first assertion.

\bigskip

The integral of $\int_{M}$ factorizes over the component in $$DD^{-}(M)(p)^{0}=F^{p}\Omega^{2p}(M)$$ with $p=\dim(M)/2$.
If $\mathrm{codim}(\cF)<p$ we have $F^{p}\Omega^{2p}(M,\cF)=0$ and hence \begin{equation}\label{rferfefref32234324344}
Z^{0}(DD^{-}(M)(2p))=0\ .
\end{equation}
This implies the second claim.
\hB

In the following we define the opposite of a framing and a spin structure.
Let $(M,\cF)$ and the stable real framing $s$ of $\cF$ be given. Then we can form the cylinder
$I\times M$ with the foliation $T_{\C}I\boxplus \cF$, see Example \ref{dekldlqwdqwdqwd}. We trivialize $TI\cong I\times \underline{\R}$ using the section $\partial_{t}$, where $t$ is the standard coordinate of the cylinder. Then we write
$T(I\times M)\cong TI\boxplus TM\cong \underline{\R}\boxplus TM$ in order to define the induced spin structure on $I\times M$. Furthermore, the identification $T_{\C}I\boxplus \cF\cong T\underline{\C}\boxplus \cF$ provides the stable real framing $I\times s$ of $T_{\C}I\boxplus \cF$. These constructions are made such that
$(M,\cF,s)$ is the boundary of $(I\times M,T_{\C}I\boxplus \cF,I\times s)$ at the upper face of the cylinder corresponding to $1\in I$. \begin{ddd} We define $(M^{op},\cF,s^{op})$ to be the boundary of the cylinder at $0\in I$. \end{ddd} Here $M^{op}$ indicates that $M$ is equipped with the opposite spin structure.

\bigskip

We adopt all the assumption made in Definition \ref{flwefjwefewff} and fix choices for $\nabla$, $\nabla^{\cF_{\R}^{\perp}}$ and $g^{TM}$. These can be extended constantly over the cylinder. In this case $\hA^{-}(\nabla^{I\times \cF_{\R}^{\perp}})\wedge \ch^{-}(\pr^{*}\nabla)$
is pulled back from $M$ and has no $dt$-component. Hence its integral over $I\times M$ vanishes.
\begin{kor}
$$\rho(M,\cF,\nabla^{I},s)=-\rho(M^{op},\cF,\nabla^{I},s^{op})\ .$$
\end{kor}

Assume now that we have two choices for $\nabla$, $\nabla^{\cF_{\R}^{\perp}}$ and $g^{TM}$. Then we can again consider the cylinder over $M$ and interpolate between these choices. The second assertion of Proposition \ref{fklwfjeklfwefewfwf897} and the vanishing \eqref{rferfefref32234324344} for $p=\frac{\dim(M)+1}{2}$ implies:
\begin{kor}\label{djwqdlqwjdkwqdwqdqd} If $2\mathrm{codim}(\cF)< \dim(M)$, then
$\rho(M,\cF,\nabla^{I},s)$ is independent of the choices of $\nabla$, $\nabla^{\cF_{\R}^{\perp}}$ and $g^{TM}$.
\end{kor}

\section{A spectral geometric interpretation}\label{fkweflwefewfewfewfwfwfw}

In this section we express $\rho(M,\cF,\nabla^{I},s)$ in terms of spectral invariants of Dirac operators.

\bigskip

Let $M$ be a closed spin manifold with a Riemannian metric $g^{TM}$ and $\bV:=(V,h^{V},\nabla^{u})$ be a hermitean vector bundle with an unitary connection. Then we can form the twisted Dirac operator $\slashed{D}\otimes \bV$. It is a first order elliptic differential operator which acts on the space of sections of $\Gamma(M,S(TM)\otimes V)$, where $S(TM)$ is the spinor bundle of $M$. It is symmetric with respect to the natural $L^{2}$-metric. Its spectrum is real and consists of eigenvalues of finite multiplicity accumulating at $\pm\infty$. By Weyl's asymptotics the number of eigenvalues with absolute value $\le R$ (counted with multiplicity) grows as $R^{\dim(M)}$.
The $\eta$-function of this operator was introduced by Atiyah-Patodi-Singer \cite{MR0397797} and is defined by
$$\eta(\slashed{D}\otimes \bV)(s)=\sum_{\lambda\not=0} m_{\lambda} \sign(\lambda)|\lambda|^{-s}\ ,$$
where the sum is taken over the non-zero eigenvalues of $\slashed{D}\otimes \bV$ and $m_{\lambda}$ denotes the multiplicity. The sum converges for $\Ree(s)>\dim(M)$. It has been further shown by \cite{MR0397797} that the $\eta$-function has a meromorphic continuation to all of $\C$ which is regular at $s=0$.

\begin{ddd} The $\eta$-invariant of $\slashed{D}\otimes \bV$ is defined by $\eta(\slashed{D}\otimes \bV):=\eta(\slashed{D}\otimes \bV)(0)$. We further define the reduced $\eta$-invariant
$$\xi(\slashed{D}\otimes\bV):=[\frac{\eta(\slashed{D}\otimes \bV)+\dim(\ker(\slashed{D}\otimes \bV))}{2}]_{\C/\Z}\in \C/\Z\ .$$
\end{ddd}

We consider the projection $\pi:M\to *$. The spin structure on $M$ and the Riemannian metric $g^{TM}$ induce a $\widehat{KU}$-orientation $\hat o$, see Subsection \ref{wlekfjwelfjewlfwef123}. The geometric bundle
$\bV$ defines a class $[V,\nabla^{u}]\in \widehat{KU}^{0}(M)$. Using the identification \eqref{djhqkjwdqwdq9879} we get by \cite[Cor. 5.5]{MR2664467}

\begin{prop}\label{kqwdkjqdwqdwd}
$$\pi_{!}^{\hat o}([V,\nabla^{u}])=\xi(\slashed{D}\otimes\bV)\ .$$
\end{prop}

We now adopt the assumptions of Definition \ref{flwefjwefewff}.
We further choose a hermitean metric $h^{V}$ and an unitary connection $\nabla^{u}$ and set $\bV:=(V,h^{V},\nabla^{u})$.

\begin{prop}\label{fjwfkljfklfjwelkfjkewjfewlkf9798237982749237432243}
We have
$$\rho(M,\cF,\nabla^{I},s)=\xi(\slashed{D}\otimes\bV)-[\int_{M} \widetilde{\hA}(LC,s)\wedge \ch(\nabla)-
\int_{M}\hA(\nabla^{LC})\wedge \widetilde{\ch}(\nabla,\nabla^{u})]_{\C/\Z}\ .$$
\end{prop}
\proof We use the rules \eqref{kdklqwjdlqwjdwldqwdqwdqd} for $x=[V,\nabla]$, $\alpha=-\widetilde{\hA}(LC,s)$ and $\omega=\widetilde{\ch}(\nabla,\nabla^{u})$ and equations \eqref{dkdqwdwqdqwdwqdkl8997}, \eqref{fwefw4343433fewfwf} and \eqref{kdqklwdjqwlkdjqlwdkwqd89798}. \hB

\section{Special cases}\label{jbjkhwekfewfwefewf89798}

\subsection{Adams $e$-invariant}\label{kdljlqwdqwdqwd}

We consider the case of the maximal foliation $\cF_{max}=T_{\C}M$ on a closed manifold $M$, see Example \ref{lkjwqdwdqwdqwdqwd}. Then a stable framing $s$ of $\cF_{max,\R}$ is a stable framing of $TM$ and
$(M,s)$ is a cycle for the framed bordism class $[M,s]\in \Omega^{fr}_{\dim(M)}$. The Pontrjagin-Thom construction identifies the framed bordism theory $\Omega^{fr}_{*}$ with the homology theory represented by the sphere spectrum. In particular, its coefficients are the stable homotopy groups of the sphere $\Omega^{fr}_{*}(*)\cong \pi_{*}^{s}$. In his study of the j-homomorphism Adams defined in \cite{MR0198470} a homomorphism
$$e^{Adams}_{\C}: \pi^{s}_{k}\to \C/\Z$$
for odd $k\in \nat$. A spectral geometric interpretation of $e^{Adams}_{\C}$ has been given by Atiyah-Patodi-Singer in
\cite{MR0397797}. In the following we describe $e^{Adams}_{\C}$ using differential $KU$-theory.

The stable framing $s$ induces a spin structure on $M$. Given a Riemannian metric
$g^{TM}$ we obtain a $\widehat{KU}$-orientation $\hat o$ of $\pi:M\to *$. It has been observed in \cite[Prop. 5.22]{MR2664467}) that the $\widehat{KU}$-orientation
$$\hat o_{s}:=\hat o-\widetilde{\hA}(LC,s)$$ of $\pi$
does not depend on the choice of the Riemannian metric.

Let $\beins\in \widehat{KU}(M)$ be the class of the trivial one-dimensional bundle $(\underline{\C},\nabla^{triv})$.
Then by \cite[Lemma. 5.24]{MR2664467}) we have using \eqref{djhqkjwdqwdq9879}

\begin{equation}\label{rewrewrrwr324324234234}
e^{Adams}_{\C}([M,s])= \pi_{!}^{\hat o_{s}}(\beins)\ .
\end{equation}
The following Corollary immediately follows from the Definition \ref{flwefjwefewff}.
\begin{kor}
We have
$$\rho(M,\cF_{max},\nabla^{triv},s)= e^{Adams}_{\C}([M,s])\ .$$
\end{kor}

Using the first identity in \eqref{kdklqwjdlqwjdwldqwdqwdqd} we get the expression (to be used later) \begin{equation}\label{kljklqwdwqd}e^{Adams}_{\C}([M,s])= \pi_{!}^{\hat o}(\beins) -[\int_{M} \widetilde{\hA}(LS,s)]_{\C/\Z} \end{equation} for the $e$-invariant.

\subsection{The $\rho$-invariant of flat bundles}

Assume now that $(V,\nabla)$ is a flat bundle on a closed odd-dimensional spin manifold $M$. The spin structure on $M$ equips the map $\pi:M\to *$ with an orientation $o$ for $\mathbf{KU}$. We observe that $$[V,\nabla^{V}]-\dim(V)\beins\in \widehat{KU}^{0}_{flat}(M)\cong \mathbf{KU}\C/\Z^{-1}(M)\ .$$
Hence we can apply the integration map \eqref{hjkfhjkhkjhkjefwefefwe7987} to this difference.
\begin{ddd}
We define the $\rho$-invariant of $\nabla$ by
$$\rho(\nabla):=\pi_{!}^{o}([V,\nabla]-\dim(V)\beins)\in \C/\Z \ .$$
\end{ddd}

If we choose a Riemannian metric, then we get a refinement of $o$ to a $\widehat{KU}$-orientation $\hat o$ of $\pi$. Using the integration in differential $K$-theory and \eqref{r23r23r23r23r23r235435346546} we can write
\begin{equation}\label{jhjkhjkehfkewhfkfhkewjhfkewjfewfwf}
\rho(\nabla):=\pi_{!}^{\hat o}([V,\nabla])-\dim(V)\pi_{!}^{\hat o}(\beins) \ .
\end{equation}

\begin{rem}{\rm Assume that $h^{V}$ is a hermitean metric on $V$ preserved by $\nabla$. Then we can form the geometric bundle $\bV=(V,\nabla,h^{V})$. As a consequence of Proposition \ref{kqwdkjqdwqdwd} we have \begin{equation}\label{jhdjkehdkwwedewd}
\pi_{!}^{\hat o}([V,\nabla])-\dim(V)\pi_{!}^{\hat o}(\beins)=\xi(\slashed{D}\otimes \bV)-\dim(V)\xi(\slashed{D})\ .
\end{equation}
Combining this with \eqref{jhjkhjkehfkewhfkfhkewjhfkewjfewfwf} we get the statement of the index theorem for flat vector bundles by Atiyah-Patodi-Singer \cite[Thm. 5.3]{MR0397799}:
$$ \xi(\slashed{D}\otimes \bV)-\dim(V)\xi(\slashed{D})=\rho(\nabla)\ .$$
Observe that this is really a non-trivial statement. The left-hand side of this equality is the analytic index of the flat bundle, and the right-hand side is the topological index since we have defined the $\rho$-invariant using the topological integration in $\mathbf{KU}\C/\Z$-theory.
}
\end{rem}

Let us now assume that the spin structure on $M$ is induced by a stable framing $s$ of $TM$.
\begin{lem}
We have
\begin{equation}\label{mkmxlkmlqwxqwx}\rho(M,\cF_{max},\nabla,s)=\rho(\nabla)+\dim(V) e^{Adams}_{\C}([M,s])\ .\end{equation}
\end{lem}
\proof Since $\nabla$ is flat we have
\begin{equation}\label{dqjwqwddqwdqwdwqd}\pi_{!}^{\hat o-\widetilde{\hA}(LS,s)}([V,\nabla])=\pi_{!}^{\hat o}([V,\nabla])-\dim(V)[\int_{M} \widetilde{\hA}(LS,s)]_{\C}\ .\end{equation}
We first use \eqref{kljklqwdwqd}
in order to replace the second term in \eqref{dqjwqwddqwdqwdwqd} and then apply \eqref{jhjkhjkehfkewhfkfhkewjhfkewjfewfwf}. \hB

\begin{rem}{\rm The decomposition \eqref{mkmxlkmlqwxqwx} of the invariant $\rho(M,\cF_{max},\nabla,s)$ is very interesting.
A priori this quantity depends on the isomorphism class of the flat bundle $(V,\nabla)$.
But we now observe that $\rho(M,\cF_{max},\nabla,s)$ is actually an invariant of the class $[V,\nabla]^{alg}\in {\mathbf{K}}(\C)^{0}(M)$ represented by $(V,\nabla)$.
This fact has already been shown in \cite{westburyjones} as we will explain in the following.

Fix a base point $m\in M$, choose an identification $V_{m}\cong \C^{\dim(V)}$, and let $\alpha:\pi_{1}(M,m)\to GL(\dim(V),\C)$ denote the holonomy representation associated associated to the flat connection $\nabla$ on $V$.
Then the quantity $e(M,\alpha)\in \C/\Z$ introduced by \cite{westburyjones} (for $M$ a homology sphere) can be written in the form (compare \cite[Thm. A]{westburyjones})
$$e(M,\alpha)=\rho(\nabla)\ .$$
The number $e(M,\alpha)\in \C/\Z$ only depends on the algebraic $K$-theory class of $M$
determined by $\alpha$ which in our notation is $[V,\nabla]^{alg}\in {\mathbf{K}}(\C)^{0}(M)$.
Since clearly $\dim(V)$ is an invariant of $[V,\nabla]^{alg}$ as well, the combination
$$\rho(M,\cF_{max},\nabla,s)=e(M,\alpha)+\dim(V) e^{Adams}_{\C}([M,s])$$
only depends on the class $[V,\nabla]^{alg}$ of $(V,\nabla)$.

In Section \ref{keklwfewfewfewf} we will show a much stronger result. We will see that $ \rho(M,\cF_{max},\nabla,s)$
only depends on the class $$\pi^{o_{s}}_{!}([V,\nabla]^{alg})\in {\mathbf{K}}(\C)^{-\dim(M)}(*)\cong K_{\dim(M)}(\C)\ ,$$ where ${\mathbf{K}}(\C)^{*}$ is the cohomology theory represented by the algebraic $K$-theory spectrum of $\C$, and $o_{s}$ is the orientation of $\pi:M\to *$ given by the framing $s$ for stable cohomotopy (and hence for every cohomology theory since it is a module theory over stable cohomotopy).

The formula \eqref{mkmxlkmlqwxqwx} can be compared with formulas in Theorem \cite[Thm 5.5]{2011arXiv1103.4217B}. We conclude that $\rho(M,\cF_{max},\nabla,s)$ can be expressed in terms of the universal $\eta$-invariant introduced in that reference. }
\end{rem}

\subsection{A families $e$-invariant}

Let $$q:W\to B$$ be a proper submersion of relative dimension $p:=\dim(W)-\dim(B)>0$ and consider the vertical foliation $\cF^{v}=T^{v}_{\C}q$ on $W$, see Example \ref{djqlwdqwdqwdq}. This foliation is real. A framing $s$ of $\cF_{\R}^{v}$ induces an orientation $o_{q}$ of the map $q$ for the framed bordism cohomology theory $\Omega^{fr,*}$. We get a class $$[W\stackrel{q}{\to} B,s]=q^{o_{q}}_{!}(1_{S})\in \Omega^{fr,-p}(B)\ ,$$ where $1_{S}\in \Omega^{fr,0}(W)$ is the unit. The construction \eqref{rewrewrrwr324324234234} of Adam's $e$-invariant can be extended from $B=*$ to general $B$
as a map
$$e^{Adams}_{\C}:\Omega^{fr,-p}(B)\to \mathbf{KU}\C/\Z^{-p-1}(B)\ .$$
According to \cite[Definition 5.23]{MR2664467} its value on the class $[W\stackrel{q}{\to} B,s]$ is given by
\begin{equation}\label{ewfwfewfwfwf23434}
e^{Adams}_{\C}([W\stackrel{q}{\to} B,s]):=q_{!}^{\hat o_{s}}(\beins)\in \widehat{KU}^{-p}(B)_{flat}\cong \mathbf{KU}\C/Z^{-p-1}(B)\ ,
\end{equation}
where $\hat o_{s}$ is the $\widehat{KU}$-orientation of $q$ induced by the framing, see Remark \ref{jdlqwkdjlqwdwqd}.

\begin{rem}\label{jdlqwkdjlqwdwqd}{\rm The construction of $\hat o_{s}$ is similar as in Subsection \ref{kdljlqwdqwdqwd}.
The vertical framing induces a spin structure. We choose a fibrewise Riemannian metric and a horizontal distribution.
Then we get a vertical Levi-Civita connection $\nabla^{T^{v}q}$. As explained in Subsection \ref{wlekfjwelfjewlfwef123} we get a d$\widehat{KU}$-orientation $\hat o$. Furthermore, using the trivial connection induced by the framing, we can define the transgression
$$\widetilde{\hA}(\nabla^{T^{v}q},s)\in DD^{per}(W)^{-1}/\im(d)\ .$$
The $\widehat{KU}$-orientation
$$\hat o_{s}:=\hat o-\widetilde{\hA}(\nabla^{T^{v}q},s)$$ is then independent of the choice of the geometric structures.

In order to see that $e^{Adams}_{\C}([W\stackrel{q}{\to} B,s])$ is flat we calculate its curvature using \eqref{r23r23r23r23r23r235435346546} and \eqref{lwefwwfewfewfefewfewfewffwe}
$$R(e^{Adams}_{\C}([W\stackrel{q}{\to} B,s]))=\int_{W/B}(\hA(\hat o_{q})-d \widetilde{\hA}(\nabla^{T^{v}q},s))=0\ .$$

}\end{rem}

Let us now assume that $B$ is closed and has a spin structure. Then the projection $\pi:B\to *$ has a $KU$ orientation $o_{\pi}$.
We choose a Riemannian metric $g^{TB}$ on $B$, a vertical metric $g^{T^{v}q}$, and a horizontal distribution $T^{h}q$. The metric $g^{TB}$ lifts to a metric on the horizontal bundle $T^{h}q$ and induces, together with the vertical metric $g^{T^{v}q}$, a metric on $W$. Furthermore, the spin structure of $B$ induces a spin structure on the horizontal bundle, which together with the framing of $T^{v}q$ provides a spin structure on $W$. Note that $\cF^{\perp}_{\R}\cong T^{h}\pi$. The Levi-Civita connection of $g^{TB}$ pulls back to the connection $\nabla^{\cF^{\perp}_{\R}}$.

\bigskip

We consider a geometric vector bundle $(V,\nabla)$ on $B$.
Then $(\pi^{*}V,\pi^{*}\nabla)$ is a bundle on $W$ and the restriction of $\pi^{*}\nabla$ to $\cF$ is flat, see Example \ref{fjewflewjflewfewf}.

\bigskip

We now assume that $\dim(W)$ is odd.
\begin{lem}
We have
$$\rho(W,\cF^{v},\pi^{*}\nabla,s)=\pi^{o_{\pi}}_{!}(e^{Adams}_{\C}([W\stackrel{q}{\to}B,s])\cup [V])\ .$$
\end{lem}
\proof The geometry on $B$ provides a $\widehat{KU}$-orientation $\hat o_{\pi}$. The geometry on $W$ induces a $\widehat{KU}$-orientation $\hat o_{\pi\circ q}$. In the following calculation we use \cite[Definition 3.22]{MR2664467} and $\hA(\hat o_{\pi})=\hA(\nabla^{\cF_{\R}^{\perp}})$ at the place marked by $!$.
\begin{eqnarray*} \hat o_{\pi}\circ \hat o_{s}&=&\hat o_{\pi}\circ (\hat o-\widetilde{\hA}(\nabla^{T^{v}q},s))\\&=&\hat o_{\pi}\circ \hat o-\hA(\hat o_{\pi})\wedge \widetilde{\hA}(\nabla^{T^{v}q},s)\\&\stackrel{!}{=}&\hat o_{\pi\circ q}-\hA\nabla^{\cF_{\R}^{\perp}})\wedge \widetilde{\hA}(\nabla^{T^{v}q},s)-\widetilde{\hA}(\nabla^{LC} , \nabla^{T^{v}q}\oplus \nabla^{\cF_{\R}^{\perp}} )\\&=&\hat o_{\pi\circ q}-\widetilde{\hA}(\nabla^{T^{v}q}\oplus \nabla^{\cF_{\R}^{\perp}}, \nabla^{triv,s}\oplus \nabla^{\cF^{\perp}_{\R}})-
\widetilde{\hA}(\nabla^{LC} , \nabla^{T^{v}q}\oplus \nabla^{\cF_{\R}^{\perp}} ) \\&=&\hat o_{\pi\circ q}-
\widetilde{\hA}(LC,s)\ .\end{eqnarray*}
We now use that integration is compatible with the identification
$KU\C/\Z^{*-1}\cong \widehat{KU}^{*}_{flat}$.
We get
\begin{eqnarray*}
\pi^{o_{\pi}}_{!}(e^{Adams}_{\C}([W\to B,s])\cup [V])&=&
\pi^{\hat o_{\pi}}_{!}(e^{Adams}_{\C}([W\to B,s])\cup [V,\nabla])
\\&\stackrel{\eqref{ewfwfewfwfwf23434}}{=}&
\pi^{\hat o_{\pi}}_{!}(q_{!}^{\hat o_{s}}(\pi^{*}[V,\nabla]))\\&\stackrel{\eqref{e23e23e3e23e32e32e2}}{=}&(\pi\circ q)^{\hat o_{\pi}\circ \hat o_{s}}(\pi^{*}[V,\nabla])\\
&=&(\pi\circ q)_{!}^{\hat o_{\pi\circ q}-
\widetilde{\hA}(LC,s)}(\pi^{*}[V,\nabla])\\
&=&\rho(M,\cF,\pi^{*}\nabla,s)
\end{eqnarray*} \hB

\subsection{The dependence on the framing}

Let $s,s^{\prime}$ be two stable framings of a foliation $\cF_{\R}$. Then we get two connections
$\nabla^{s}$ and $\nabla^{s^{\prime}}$ on $\cF_{\R}\oplus \R^{n}$. Since these connections are flat, by \eqref{1e1h2ekj12ej12ke21hek2eh} we get a cohomology class
$$\widetilde{\hA}(\nabla^{s^{\prime}},\nabla^{s})\in H^{-1}(DD^{per}(M))\ .$$

\begin{ddd}
For every class $u\in KU^{0}(M)$ we define the relative $e$-invariant of the pair
$(s^{\prime},s)$ of stable framings of $\cF_{\R}$ by $$e_u(s^{\prime},s)=[\int_{M}\widetilde{\hA}(\nabla^{s^{\prime}},\nabla^{s})\cup \ch(u)]\in \C/\Z\ .$$
\end{ddd}

\begin{rem}{\rm If $\cF=\cF_{max}$, then
$$e_{1}(s^{\prime},s)=e^{Adams}_{\C}([M,s^{\prime}])-e^{Adams}_{\C}([M,s])\ .$$
In this case $e_{1}(s^{\prime},s)$ takes values in the well-known finite subgroup $\im(e^{Adams}_{\C})\subseteq \C/\Z$ calculated by Adams.
}
\end{rem}

The proof of the following proposition is a straightforward calculation.
\begin{prop}
We adopt the assumptions of Definition \ref{flwefjwefewff} and assume that $s,s^{\prime}$ are stable framings of $\cF_{\R}$.
Then we have
$$\rho(M,\cF,\nabla^{I},s^{\prime})-\rho(M,\cF,\nabla^{I},s)=e_{[V]}(s^{\prime},s)\ .$$
\end{prop}

\subsection{Real and imaginary parts}

\subsubsection{The decomposition}
In this subsection we discuss the components $\rho(\dots)^{\R/\Z}$ and $\rho(\dots)^{i\R}$ of
$\rho(M,\cF,\nabla^{I},s)$ associated to the decomposition of the target group $$\C/\Z\cong \R/\Z\oplus i\R\ , \quad x=x^{\R/\Z}+x^{i\R}$$ into the real and the imaginary parts.

\bigskip

We adopt the assumptions made in Definition \ref{flwefjwefewff}. In addition we choose a hermitean metric $h^{V}$ on the complex vector bundle $V$. Then we can define the adjoint connection $\nabla^{*}$ of $\nabla$ (see Remark \ref{dkjqwlkdjqwlkjdlwqkdwdwqdqwd}) and its unitarization $$\nabla^{u}:=\frac{1}{2}(\nabla+\nabla^{*})\ .$$ with respect to $h^{V}$.

\bigskip

We use \eqref{kdqklwdjqwlkdjqlwdkwqd89798} in order to write \begin{equation}\label{gdhqgdhjgwqdjhgqwd7987}
[V,\nabla]=[V,\nabla^{u}]+a(\widetilde{\ch}(\nabla,\nabla^{u}))\ .
\end{equation}
Then we calculate
\begin{eqnarray}\lefteqn{\rho(M,\cF,\nabla^{I},s)}&&\nonumber\\&\stackrel{\eqref{kdklqwjdlqwjdwldqwdqwdqd}}{=}&\pi^{\hat o}_{!}([V,\nabla])-[\int_{M} \widetilde{\hA}(LS,s)\wedge \ch(\nabla)]_{\C/\Z}\nonumber\\&\stackrel{\eqref{gdhqgdhjgwqdjhgqwd7987}}{=}&\pi^{\hat o}_{!}([V,\nabla^{u}])+[\int_{M} \hA(\nabla^{LC})\wedge \widetilde{\ch}(\nabla,\nabla^{u})]_{\C/\Z} - [\int_{M} \widetilde{\hA}(LS,s)\wedge \ch(\nabla)]_{\C/\Z}\nonumber\\
&\stackrel{\eqref{hfjwkjehfkjwehfewfewf897987}}{=}&\pi^{\hat o}_{!}([V,\nabla^{u}])+[\int_{M} \hA(\nabla^{LC})\wedge \widetilde{\ch}(\nabla,\nabla^{u})]_{\C/\Z}\nonumber\\&& - [ \int_{M} \widetilde{\hA}(LS,s)\wedge (\ch(\nabla^{u})+d\widetilde{\ch}(\nabla,\nabla^{u})) ]_{\C/\Z}\nonumber\\&=&
\pi^{\hat o}_{!}([V,\nabla^{u}])-[\int_{M} \widetilde{\hA}(LS,s)\wedge \ch(\nabla^{u}) ]_{\C/\Z} +[\int_{M} \hA(\nabla^{\cF^{\perp}_{\R}})\wedge \widetilde{\ch}(\nabla,\nabla^{u})]_{\C/\Z} \label{dlkqwdqwdqwdqwdwqdqw}\end{eqnarray}
using partial integration and Stoke's theorem in the last step.

The first two summands in \eqref{dlkqwdqwdqwdqwdwqdqw} are real. The following is the decomposition of the transgression Chern form into the real and imaginary part (we use \eqref{fewfwefwefwefew32434234234} and \eqref{qwdqwdqwdwqdqwdwqdqwdqwd}):
$$ \widetilde{\ch}(\nabla,\nabla^{u})=\frac{ \widetilde{\ch}(\nabla,\nabla^{u})+ \widetilde{\ch}(\nabla^{*},\nabla^{u})}{2}+\frac{\widetilde{\ch}(\nabla,\nabla^{*})}{2}\ .$$
We get
\begin{eqnarray}
\rho(M,\cF,\nabla^{I},s)^{\R/\Z}&=&\pi^{\hat o}_{!}([V,\nabla^{u}])-[\int_{M} \widetilde{\hA}(LS,s)\wedge \ch(\nabla^{u}) ]_{\R/\Z} \\&&+[\int_{M} \hA(\nabla^{\cF^{\perp}_{\R}})\wedge \frac{ \widetilde{\ch}(\nabla,\nabla^{u})+ \widetilde{\ch}(\nabla^{*},\nabla^{u})}{2}]_{\R/\Z}\nonumber\\[1cm]
\rho(M,\cF,\nabla^{I},s)^{i\R} &=& \int_{M} \hA(\nabla^{\cF^{\perp}_{\R}})\wedge \frac{\widetilde{\ch}(\nabla,\nabla^{*})}{2} \label{fjilfewfwefffef}
\ .\end{eqnarray}

\subsubsection{The imaginary part}

We see that the imaginary part $\rho(M,\cF,\nabla^{I},s)^{i\R}$ is just a characteristic number which can be calculated as an integral over locally computable quantities. It does not depend on the framing.

\begin{ex}{\rm We assume that $\nabla^{I}$ is unitary with respect to the metric $h$. Then we can take for $\nabla$ the unitary extension constructed in Lemma
\ref{dhqwkdqkwddqwdwqdioipopioipoopi}. With this choice we have $\nabla= \nabla^{u}$.
\begin{kor}
If $\nabla$ is the unitary extension of $\nabla^{I}$, then $$\rho(M,\cF,\nabla^{I},s)^{i\R}=0\ .$$
In particular, if $2\mathrm{codim}(\cF)<\dim(M)$ and $\nabla^{I}$ is unitary, then $\rho(M,\cF,\nabla^{I},s)^{i\R}=0$. \end{kor}\proof The first assertion follows from \eqref{fjilfewfwefffef}, $\nabla=\nabla^{*}$ and the second equality in \eqref{fewfwefwefwefew32434234234}. The second assertion is then a consequence of the first and Corollary \ref{djwqdlqwjdkwqdwqdqd}. \hB
}\end{ex}

\bigskip

\begin{ex}{\rm For example, if $\pi:\tilde M\to M$ is a finite covering of degree $[\tilde M:M]\in \nat$, then we have the identity
\begin{equation}\label{dwddwqqdqwd342423424324dasd}
\rho(\tilde M,\pi^{*}\cF,\pi^{*}\nabla^{I},\pi^{*}s)^{i\R}=[\tilde M:M] \rho(M,\cF,\nabla^{I},s)^{i\R}\ .
\end{equation}
}\end{ex}

\bigskip

Given a foliated manifold $(M,\cF)$ we have an associated bundle $\cF^{\perp}$ with a flat partial connection $\nabla^{I,\cF}$. If we apply $\rho(\dots)^{i\R}$ to $(V,\nabla^{I})=(\cF^{\perp},\nabla^{I,\cF})$ or a bundle obtained from this by some operation of tensor calculus we get an invariant of the foliation $(M,\cF)$.

\begin{ex}\label{fkllwefwefewf}{\rm In this example, for even $n\in \Z$, we consider a $2n+1$-dimensional closed oriented manifold $M$ with a real foliation $\cF$ of codimension $1$. We assume that $\cF_{\R}$ is co-oriented. Furthermore, we assume that $TM$ has a stable framing $s_{M}$ and a Riemannian metric $g^{TM}$.
The co-orientation of $\cF_{\R}$ induces a framing $s^{\perp}$ of $\cF^{\perp}_{\R}$ by the positive normal unit vector field $N$.
There is then a unique stable framing of $\cF_{\R}$ such that $s\oplus s^{\perp}\sim s_{M}$.

For $(V,\nabla^{I})$ we take $(\cF^{\perp},\nabla^{I,\cF^{\perp}})$. Let $\mathbf{GV}_{2n+1}(\cF)\in H^{2n+1}(M;\R)$ be the Godbillon-Vey class of the foliation.

\begin{lem}
We have
$$\rho(M,\cF,\nabla^{I},s)^{i\R}=\frac{(-1)^{n+1}}{(2\pi i)^{n+1} n!}\int_{M} \mathbf{GV}_{2n+1}(\cF)\ .$$
\end{lem}
\proof Since $\dim(\cF_{\R}^{\perp})=1$ we have $ \hA_{4p}(\nabla^{\cF^{\perp}_{\R}})\in F^{2p}\Omega^{4p}(M,\cF)=0$ for all $p\ge 1$.
Hence \eqref{fjilfewfwefffef} specializes to
$$\rho(M,\cF,\nabla^{I,\cF^{\perp}},s)^{i\R}=\frac{1}{2}\int_{M} \widetilde{\ch}_{2n+2}(\nabla,\nabla^{*})\ .$$
So we must identify $\widetilde{\ch}_{2n+1}(\nabla,\nabla^{*})$ with a multiple of $\mathbf{GV}_{2n+1}(\cF)$.

\bigskip

We first recall the definition of the Godbillon-Vey class $\mathbf{GV}_{2n+1}(\cF)$.
Since $\cF_{\R}$ is co-oriented there exists a real nowhere vanishing one-form $\kappa\in \Omega^{1}(M)$ such that
$\cF_{\R}=\ker(\kappa)$. Integrability of $\cF_{\R}$ translates to the relation $\kappa\wedge d\kappa=0$. We can choose a real $1$-form
$\omega\in \Omega^{1}(M)$ such that $d\kappa=\kappa\wedge \omega$.
Note that $ \omega$ is unique up to multiples of $\kappa$. Then the form $\omega\wedge d\omega^{n}\in \Omega^{2n+1}(M)$ is closed and represents the Godbillon-Vey class $\mathbf{GV}_{2n+1}(\cF)$.

\bigskip

Using the unit normal vector field $N\in \Gamma(M,TM)$
we can normalize $\kappa$ such that $ \kappa(N)=1$. Let $\omega$ be as above. We take $\omega$ as a connection one-form for a connection $\nabla$ on $\cF^{\perp}_{\R}$ with respect to the trivialization by $N$.
For a section $X$ of $TM$ we have by definition
$$\nabla_{X}N=\omega(X)N\ .$$
On the other hand, if $X$ is a section of $\cF$, then we have by Cartan's formula
$$\omega(X)=(\kappa\wedge \omega)(N,X)=d\kappa(N,X)= N\kappa(X)-X\kappa(N)-\kappa([N,X])=\kappa([X,N])\ .$$
In view of the description of $\nabla^{I,\cF^{\perp}_{\R}}$ given in Example \ref{fklwefjwefewf} this implies that the connection $\nabla$ extends the flat partial connection $\nabla^{I,\cF^{\perp}_{\R}}$.

\bigskip

We have
$$\frac{(-1)^{n+1}}{(2\pi i)^{n+1} n!}\omega\wedge (d\omega)^{n}= \widetilde{\ch}_{2n+2}(\nabla,\nabla^{triv})\ .$$
Similarly,
$$(-1)^{n+1}\frac{(-1)^{n+1}}{(2\pi i)^{n+1} n!}\omega\wedge (d\omega)^{n}= \widetilde{\ch}_{2n+2}(\nabla^{*},\nabla^{triv})\ .$$
Hence, if $n$ is even, then by taking the difference of these two equations we get
$$\frac{2(-1)^{n+1}}{(2\pi i)^{n+1} n!}\mathbf{GV}_{2n+1}(\cF)= \widetilde{\ch}_{2n+2}(\nabla ,\nabla^{*})\ .$$

\hB
}
\end{ex}

\begin{rem}\label{ergegojerglerogergeg}{\rm As noted above we can take $(V,\nabla^{I}):=(\cF^{\perp},\nabla^{I, \cF^{\perp}})$
in order to define an invariant which only depends on the foliation $\cF$.
In this example assume that $\cF$ is real and that
$\nabla^{\cF^\perp}$ is the complexification of a connection $\nabla^{\cF_{\R}^{\perp}}$ extending $\nabla^{I,\cF^{\perp}_{\R}}.$ We choose in addition a metric $h^{\cF^{\perp}_{\R}}$ in order to define the adjoint $\nabla^{\cF^{\perp},*} $.
In this remark we explain the place of $$\rho(M,\cF,\nabla^{I,\cF^{\perp}},s)^{i\R}= \int_{M} \hA(\nabla^{\cF^{\perp}_{\R}})\wedge \frac{\widetilde{\ch}(\nabla^{\cF^{\perp}},\nabla^{\cF^{\perp},*})}{2}$$ in the classification of foliation invariants defined in terms of secondary characteristic classes of foliations.

\bigskip

We start with the classification of characteristic forms for foliations of codimension $q\in \nat$ \cite{MR0307250}, see also \cite{MR512428}. Let
$q^{\prime}\in \nat$ be the smallest odd integer $\le q$.
One defines the commutative graded algebra
$$WO_{q}:=\R[\tilde c_{1},\dots,\tilde c_{q^{\prime}}]\otimes \R[c_{1}, ,\dots,c_{q}]^{\le 2q}\ ,$$
where the degrees of the generators are given by $$|\tilde c_{i}|=2i-1\ , \quad \mbox{$i$ odd, $\quad \quad $ and }\quad \quad \quad |c_{i}|=2i$$
and the superscript $[-]^{\le 2q}$ indicates that we take only polynomials of degree less than $2q$.

On this ring we consider the differential $d$ given by
$$d\tilde c_{i}:=c_{i}\ ,\quad dc_{i}=0\ .$$
The cohomology $H^{*}(WO_{q})$ of this DGA classifies secondary characteristic classes for foliations of codimension $q$.
For a cohomology class $[U]\in H^{q}(WO_{q})$ we let $\Delta([U])\in H^{*}(M;\R)$ denote the corresponding cohomology class.

\bigskip

In the following we describe $\Delta$ on the form level. Since $\nabla^{\cF^{\perp}}$ and $\nabla^{\cF^{\perp},*}$ are complexifications of connections which are dual to each other on a real bundle we have
$$\ch_{2n}(\nabla^{\cF^{\perp},*})=(-1)^{n} \ch_{2n}(\nabla^{\cF^{\perp}})\ .$$
By \eqref{hfjwkjehfkjwehfewfewf897987}
we get for odd $n$
$$d \frac{1}{2i^{n}}\widetilde{\ch}_{2n}(\nabla^{\cF^{\perp}}, \nabla^{\cF^{\perp},*})=\frac{1}{i^{n}}\ch_{2n}(\nabla^{\cF^{\perp}})\ .$$
Therefore the connection $\nabla^{\cF^{\perp}}$ together with a choice of a metric $h^{\cF^{\perp}_{\R}}$ induces a map of commutative differential graded algebras
$$\Delta_{(\nabla^{\cF^{\perp}},h^{\cF^{\perp}_{\R}})}:WO_{q}\to \Omega(M)\ ,$$ by $$\Delta_{( \nabla^{\cF^{\perp}} ,h^{\cF^{\perp}_{\R}})}(\tilde c_{i}):=\frac{1}{2i^{n}}\widetilde{\ch}_{2n}(\nabla^{\cF^{\perp}}, \nabla^{\cF^{\perp},*})\ , \quad \Delta_{( \nabla^{\cF^{\perp}} ,h^{\cF^{\perp}_{\R}})} (c_{n}):=\frac{1}{i^{n}}\ch_{2n}(\nabla^{\cF^{\perp}})\ .$$

Then for $[U]\in H^{*}(WO_{q})$ the characteristic class $\Delta([U])\in H^{*}(M;\R)$ of the foliation $\cF$ is given by
\begin{equation}\label{qwdqwdwqdqwdqwdqwd2312343534tfwrvwfvwfwef}
\Delta([U]):=[\Delta_{( \nabla^{\cF^{\perp}} ,h^{\cF^{\perp}_{\R}})}(U)] \ .
\end{equation}

\bigskip

There is a universal polymonial
$ A(c_{1},\dots,c_{q})\in \R[c_{1},\dots,c_{q}]^{\le 2q}$ such that
$$\hA(\nabla^{\cF^{\perp}_{\R}})^{\le 2q}= A(\ch_{2}(\nabla^{\cF^{\perp}}),\dots,\ch_{2q}(\nabla^{\cF^{\perp}}))\ .$$
We consider \begin{equation}\label{fwefwefewfewfewfwefew53453453455}
U:=\left[\left(\sum_{i=1, odd}^{q^{\prime}} (-1)^{\frac{i+1}{2}} \tilde c_{i}\right) A(c_{1},\dots,c_{q}) \right]_{\dim(M)}\in WO^{\dim(M)}_{q}\ .
\end{equation}

If $2q<\dim(M)$, then $U$ is a cycle. \begin{lem} Let $\cF$ be a real foliation of codimension $q$ such that $2q<\dim(M)$. Then the class $$[U]\in H^{\dim(M)}(WO_{q})$$
is the universal class classifying the imaginary part of $\rho(M,\cF,\nabla^{I,\cF^{\perp}},s)$.
\end{lem}
\proof The relation
$$\rho(M,\cF,\nabla^{I,\cF^{\perp}},s)^{i\R} =i\langle \Delta([U]),[M]\rangle$$ follows immediately from \eqref{fwefwefewfewfewfwefew53453453455}, the definition \eqref{qwdqwdwqdqwdqwdqwd2312343534tfwrvwfvwfwef} of $\Delta([U])$ and \eqref{fjilfewfwefffef}.
\hB

Let us assume that $p$ is odd and $2p-1> q$. Then $d\tilde c_{p}=0$ and we have the cohomology class $[\tilde c_{p}]\in H^{2p-1}(WO_{q})$.
If the foliation $\cF$ is real, then the characteristic class
\eqref{kfkwejwlkefjlwekfjewfewfopipoi234} is given by \begin{equation}\label{ewfwefewfwf432342344123}
[c_{2p-1}(\nabla^{I,\cF^{\perp}})]=2i^{p}\Delta[\tilde c_{p}]\ .
\end{equation}

}
\end{rem}

\subsubsection{The real part}

The real part $\rho(M,\cF,\nabla^{I},s)^{\R/\Z}$ is more complicated and of global nature.
A good case to look at is discussed in Example \ref{kdljlqwdqwdqwd}.

\begin{ex}{\rm The following example shows that $\rho(M,\cF,\nabla^{I},s)$ is not an integral over $M$ of locally determined quantities.
We consider the manifold $M:=S^{1}$ with the maximal foliation $\cF_{max}=T_{\C}S^{1}$. The framing $s$ of $TS^{1}$ is the bounding framing so that $[S^{1},s]=0$ in $\Omega_{1}^{fr}$. Furthermore we let $\bV(r):=(V,h,\nabla(r))$
be flat line bundle with holonomy $\exp(2\pi i r)$ for $r\in [0,1)$. Then we can apply \eqref{mkmxlkmlqwxqwx} and \eqref{jhdjkehdkwwedewd} and get
$$\rho(S^{1},\cF_{max},\nabla(r),s)=\rho(\nabla)=\xi(\slashed{D}\otimes \bV(r))-\xi(\slashed{D})\ .$$
In this case the reduced $\eta$-invariant can be calculated explicitly. The result is
$$\xi(\slashed{D}\otimes \bV(r))=[-r]_{\C/\Z}\ .$$
Hence we get
$$\rho(S^{1},\cF_{max},\nabla(r),s) =[-r]_{\C/\Z}\ .$$
In particular, our invariant depends non-trivially on $r$.
The data $(S^{1},\cF_{max},\nabla(r),s)$ for different $r$ are locally isomorphic.

\bigskip

Note that in this example the analog of \eqref{dwddwqqdqwd342423424324dasd} nevertheless holds true.
}
\end{ex}

\section{Factorization over algebraic $K$-theory of smooth functions}\label{keklwfewfewfewf}

Let $P$ a closed $p$-dimensional manifold and $s$ be a stable framing of $TP$. For a manifold $X$
we consider a product of foliated manifolds \begin{equation}\label{gergergergregegrgerg3453435}
(M,\cF):=(P\times X,T_{\C}P\boxplus 0)=(P,\cF_{max})\times (X,\cF_{min})
\end{equation} and a pair $(V,\nabla^{I})$ of a complex vector bundle and a flat partial connection on $(M,\cF)$. We will show that the data represents an algebraic $K$-theory class $$f^{o_{s}}_{!}(
[V,\nabla^{I}]^{alg})\in K_{p}(C^{\infty}(X))$$ of the ring $C^{\infty}(X)$.
If we assume that $X$ is closed, spin and that $\dim(X)<p$, then our main result is the equality $$\rho(M,\cF,\nabla^{I},s)=\pi_{!}^{o}({\tt reg}_{X}(f^{o_{s}}_{!}([V,\nabla^{I}]^{alg})))\ ,$$
where $${\tt reg}_{X}:K_{p}(C^{\infty}(X))\to {\mathbf{ku}}\C/\Z^{-p-1}(X)$$ is the regulator and the map $$\pi:X\to *$$ has the ${\mathbf{ku}}$-orientation from the spin structure of $X$.

\subsection{Statement of the result}\label{}

For manifolds $X$ and $P$ we consider the foliated manifold \eqref{gergergergregegrgerg3453435}.
From the point of view of foliation theory it is very trivial. The leaves of the foliation on the product $M=P\times X$ are just the submanifolds $P\times \{x\}$ for all $x\in X$.
\bigskip

We assume that $P$ is closed and that the tangent bundle $TP$ of $P$ is equipped with a stable framing $s$. The framing $s$ induces an orientation $o_{s}$ of the map $f:P\to *$ for the stable cohomotopy theory, the cohomology theory represented sphere spectrum $\bS$ (or equivalently, the framed bordism theory).
Any spectrum $\bE$ is a module spectrum over $\bS$. Consequently $f$ has an induced orientation for the cohomology theory $\bE^{*}$ which we denote by the same symbol $o_{s}$.
We have an Umkehr or integration map between cohomology groups
$$ f_{!}^{o_{s}}:\bE^{*}(P)\to \bE^{*-p}(*)\ ,$$
where $p:=\dim(P)$.
We will apply this to the cohomology theory ${\mathbf{K}}(C^{\infty}(X))^{*}$ represented by the connective algebraic $K$-theory spectrum ${\mathbf{K}}(C^{\infty}(X))$ of the ring of complex-valued smooth functions on the manifold $X$.

\bigskip

We start with the class $$[V,\nabla^{I}]^{alg}\in {\mathbf{K}}(C^{\infty}(X))^{0}(P) $$ (see Definition \ref{xgrergegg} for a technical description) represented by a pair $(V,\nabla^{I})$ of a complex vector bundle on the foliated manifold \eqref{gergergergregegrgerg3453435} and a flat partial connection.
We can form the algebraic $K$-theory class \begin{equation}\label{ghjqwdgjqwdwqdwqdqdqdqd}
f^{o_{s}}_{!}([V,\nabla^{I}]^{alg})\in {\mathbf{K}}(C^{\infty}(X))^{-\dim(P)}(*)=K_{\dim(P)}(C^{\infty}(X))\ .
\end{equation}

We now assume that $X$ is closed and spin. We further assume that $\dim(P)+\dim(X)$ is odd and that $\dim(X)<\dim(P)$, or equivalently, $$2\mathrm{codim}(\cF)<\dim(M)\ .$$ Then by Corollary \ref{djwqdlqwjdkwqdwqdqd} the invariant $\rho(M,\cF,\nabla^{I},s)\in \C/\Z$ is well-defined and independent of additional geometric choices.

The main result of the present section shows that $\rho(M,\cF,\nabla^{I},s)$ can be expressed in terms of the class
\eqref{ghjqwdgjqwdwqdwqdqdqdqd}.
In greater detail, for every $n\in \nat$ with $n> \dim(X)$ we will construct, using methods from differential cohomology theory, a natural regulator
$${\tt reg}_{X}:K_{n}(C^{\infty}(X))\to {\mathbf{ku}}\C/\Z^{-n-1}(X)\ ,$$
see Definition \ref{klfwefewfewfwf}.
Let $\pi:X\to *$ be the projection. The spin structure on $X$ induces an orientation $o$ for the periodic complex topological $K$-theory $\mathbf{KU}$, and hence for the $\mathbf{KU}$-modules ${\mathbf{ku}}$ and ${\mathbf{ku}}\C/\Z$.
We use the isomorphisms
$${\mathbf{ku}}^{k}\cong \left\{\begin{array}{cc}\Z&k\in 2\nat\\ 0&else\end{array}\right.\ , \quad {\mathbf{ku}}\C/\Z^{k}\cong \left\{\begin{array}{cc}\C/\Z&k\in 2\nat\\ 0&else\end{array}\right.$$
in order to interpret elements in ${\mathbf{ku}}\C/\Z^{2*}(*)$ (e.g. the left-hand side of \eqref{wefwefefewfwfewfwewefwefwef}) as elements of $\C/\Z$.
\begin{theorem}\label{flkfefwefwefewfef} We have the relation
\begin{equation}\label{wefwefefewfwfewfwewefwefwef}\pi^{o}_{!}({\tt reg}_{X}(f^{o_{s}}_{!}([V,\nabla^{I}]^{alg})))=\rho(M,\cF,\nabla^{I},s)\ .\end{equation}
\end{theorem}
The proof of this Theorem will be finished in Subsection \ref{fwklfwfewfewfewf}.

\begin{rem}{\rm Every class $x\in K_{*}(C^{\infty}(X))$ can be presented in the form \eqref{ghjqwdgjqwdwqdwqdqdqdqd} for suitable stably framed manifolds $P$ and pairs $(V,\nabla^{I})$. Indeed, the class $x$ can be thought of being represented by a map $x:S^{n}\to BGL(C^{\infty}(X) )^{+}$, where we consider $GL(C^{\infty}(X) )$ as a discrete group and $+$ stands for Quillen's $+$-construction. Using the standard stable framing $s_{can}$ of $S^{n}$ the triple $(S^{n},x,s_{can})$ represents a framed bordism class
$[S^{n},x,s_{can}]\in \Omega^{fr}_{n} (BGL(C^{\infty}(X) )^{+})$. Since the $+$-construction map
$$p:BGL(C^{\infty}(X) )\to BGL(C^{\infty}(X) )^{+}$$ induces an isomorphism in generalized homology theories there exists a unique class
$[P,y,s]\in \Omega^{fr}_{n}(BGL(C^{\infty}(X) ))$ such that $p_{*}([P,y,s])=[S^{n},x,s_{can}]$. Since $P$ is compact, there exists a factorization of $y$ as
$$P\stackrel{\tilde y}{\to} BGL(N,C^{\infty}(X) )\to BGL(C^{\infty}(X) )$$ for a suitable $N\in \nat$. The map $\tilde y$
classifies a pair $(V,\nabla^{I})$ over $P\times X$ of an $N$-dimensional complex vector bundle with a flat partial connection in the $P$-direction. We then have
$$f_{!}^{o_{s}}([V,\nabla^{I}]^{alg})=x\ .$$ \hB
}
\end{rem}

\subsection{Algebraic $K$-theory sheaves}
\label{klfwjlefewfewf}

We consider the site $\Mf_{\C-fol}$ of pairs $(M,\cF)$ of manifolds $M$ with a foliation $\cF$ and foliated maps (see Section \ref{fewl453534535435} for definitions).
The topology given by open coverings. We have a morphism of sites \begin{equation}\label{ewfwfwefewfewfwfwefwfe}
\Mf_{\C-fol}\to \Mf
\end{equation} which forgets the foliations.

In the following we work in the framework of $\infty$-, or more precisely, of $(\infty,1)$-categories developed by Joyal, Lurie and others \cite{HTT}, \cite{HA}. We refer to \cite[Sec. 2.1]{Bunke:2014aa}, \cite[Sec. 2]{2013arXiv1311.3188B}
and \cite[Sec.4]{2012arXiv1208.3961B} for an introduction to the language as we will use it here and for further references. We will not discuss the size issues. They can be solved in the standard way for the examples used in the present paper.

\bigskip

For a presentable $\infty$-category $\bC$ and a site $\mathbf{M}$ we consider the category
${\mathbf{PSh}}_{\bC}(\mathbf{M})$ of $\bC$-values presheaves and its full subcategory of sheaves $\Sh_{\bC}(\mathbf{M})$.
They are related by an adjunction \begin{equation}\label{dqdqwdwqdwqdwqdwqdqd}
L:{\mathbf{PSh}}_{\bC}(\mathbf{M})\leftrightarrows\Sh_{\bC}(\mathbf{M}):inclusion\ ,
\end{equation}

where $L$ is called the sheafification.
\bigskip

We consider the $1$-category of categories ${\mathbf{Cat}}$ with its cartesian symmetric monoidal structure.
For the class $W$ of categorical equivalences we form the symmetric monoidal $\infty$-category
${\mathbf{Cat}}[W^{-1}]$. By ${\mathbf{CAlg}}({\mathbf{Cat}}[W^{-1}])$ we denote the category of commutative algebras in ${\mathbf{Cat}}[W^{-1}]$.
\begin{rem}\label{dkwqnmqlkwdwqdwqdqd}{\rm A commutative monoid can be considered as a symmetric monoidal category with only unit morphisms.
It is an object of ${\mathbf{CAlg}}({\mathbf{Cat}})$ and therefore represents one in ${\mathbf{CAlg}}({\mathbf{Cat}}[W^{-1}])$. A general symmetric monoidal category has non-identity associator and commutativity constraints and is therefore not a commutative algebra in ${\mathbf{Cat}}$. But it naturally represents an object in ${\mathbf{CAlg}}({\mathbf{Cat}}[W^{-1}])$.

}
\end{rem}
The objects of ${\mathbf{PSh}}_{{\mathbf{CAlg}}({\mathbf{Cat}}[W^{-1}])} (\mathbf{M})$ are called symmetric monoidal prestacks. Similarly, objects in $\Sh_{{\mathbf{CAlg}}({\mathbf{Cat}}[W^{-1}])}(\mathbf{M})$ are called symmetric monoidal stacks.

\bigskip

We consider the following four symmetric monoidal stacks on $\Mf$ or $\Mf_{\C-fol}$ of vector bundles with additional structures. The monoidal structure is always given by the direct sum.

\begin{enumerate}
\item For a manifold $M$
we let ${\mathbf{Vect}} (M )$ denote the category of vector bundles $V\to M$. A map $f:M^{\prime}\to M$ induces a functor $f^{*}:{\mathbf{Vect}} (M^{\prime} )\to {\mathbf{Vect}} (M )$. We get a stack ${\mathbf{Vect}} $ on the site $\Mf $ with respect to the topology of open coverings.
We use the same symbol for its pull-back to the site $\Mf_{\C-fol}$ along \eqref{ewfwfwefewfewfwfwefwfe}.
\item We let ${\mathbf{Vect}}^{\nabla} (M)$ denote the category of pairs ($V,\nabla)$ of a vector bundle $V\to M$ and a connection. A map $f: M^{\prime} \to M $ induces a functor $f^{*}:{\mathbf{Vect}}^{\nabla} (M^{\prime} )\to {\mathbf{Vect}}^{\nabla} (M )$. We get a symmetric monoidal stack ${\mathbf{Vect}}^{\nabla} $ on the site $\Mf $. We use the same symbol for its pull-back to the site $\Mf_{\C-fol}$ along \eqref{ewfwfwefewfewfwfwefwfe}.
\item For a foliated manifold $(M,\cF)$ we let ${\mathbf{Vect}}^{\flat}(M,\cF)$ denote the category of pairs $(V,\nabla^{I})$ of a vector bundles $V\to M$ and a flat partial connection $\nabla^{I}$ on $V$, see Section \ref{fhfjlwefkjfewfewfewfwf}. A foliated map $f:(M,\cF)\to (M^{\prime},\cF^{\prime})$ induces a functor $f^{*}:{\mathbf{Vect}}^{\flat}(M^{\prime},\cF^{\prime})\to {\mathbf{Vect}}^{\flat}(M,\cF)$. We get a stack ${\mathbf{Vect}}^{\flat}$ on the site $\Mf_{\C-fol}$. \item We let ${\mathbf{Vect}}^{\flat,\nabla}(M,\cF)$ denote the category of pairs $(V,\nabla)$ of a vector bundle $V\to M$ and a connection $\nabla$ on $V$ which is flat in the direction of the foliation. A foliated map $f$ as above induces a functor $f^{*}:{\mathbf{Vect}}^{\flat,\nabla}(M^{\prime},\cF^{\prime})\to {\mathbf{Vect}}^{\flat,\nabla}(M,\cF)$. We get a symmetric monoidal stack ${\mathbf{Vect}}^{\flat,\nabla}$ on the site $\Mf_{\C-fol}$. \end{enumerate}

There is a commutative diagram of forgetful maps in $\Sh_{{\mathbf{CAlg}}({\mathbf{Cat}}[W^{-1}])} (M,\cF)$: \begin{equation}\label{wsdqwdwqdqwdwqdsq} \xymatrix{{\mathbf{Vect}}^{\flat,\nabla}\ar[d]\ar[r]&{\mathbf{Vect}}^{\flat}\ar[d]\\{\mathbf{Vect}}^{\nabla}\ar[r]&{\mathbf{Vect}}}\end{equation}
We now apply the $K$-theory machine $\mathcal{K}$ (see \cite[Def. 6.1]{2013arXiv1311.3188B} and Remark \ref{flwjfklefewfwfewfwfw234244}) and get a commutative diagram of presheaves of spectra \begin{equation}\label{f4fwefwefewfwf}
\xymatrix{\mathcal{K}({\mathbf{Vect}}^{\flat,\nabla}) \ar[d]\ar[r]&\mathcal{K}({\mathbf{Vect}}^{\flat}) \ar[d]\\\mathcal{K}({\mathbf{Vect}}^{\nabla})\ar[r]\ar[d]&\mathcal{K}({\mathbf{Vect}})\ar[d]\\ \hat {\mathbf{ku}}^{\nabla}\ar[r]& \hat {\mathbf{ku}} }
\end{equation}
in ${\mathbf{PSh}}_{\Sp}(\Mf_{\C-fol})$.
The upper square in \eqref{f4fwefwefewfwf} is by definition the image of \eqref{wsdqwdwqdqwdwqdsq} under $\mathcal{K}$.
The lower horizontal map is defined by applying the sheafification $L$ (see \eqref{dqdqwdwqdwqdwqdwqdqd}) to the middle horizontal arrow and the lower vertical arrows are the units of the sheafification.
In particular, we use the notation \begin{equation}\label{wqdwqddq21321}
\hat {\mathbf{ku}}^{\nabla}:=L(\mathcal{K}({\mathbf{Vect}}^{\nabla}))\ , \quad \hat {\mathbf{ku}}:=L(\mathcal{K}({\mathbf{Vect}}))\ .
\end{equation}

\begin{rem}\label{flwjfklefewfwfewfwfw234244}{\rm For the sake of the reader let us indicate some details on the $K$-theory machine $\mathcal{K}$. It is the composition \begin{eqnarray*}&&
{\mathbf{CAlg}}({\mathbf{Cat}}[W^{-1}])\to {\mathbf{CAlg}}(\mathbf{Groupoids}[W^{-1}])\to {\mathbf{CommMon}}(\sSet[W^{-1}])\\&&\hspace{4cm}\to {\mathbf{CommGroup}}(\sSet[W^{-1}])\simeq \Sp_{\ge 0}\to \Sp\ .\end{eqnarray*}

of the following functorial constructions:
\begin{enumerate}
\item We first take the underlying symmetric monoidal groupoid.
\item Then we apply the nerve in order to get a commutative monoid in the category of spaces $\sSet[W^{-1}]$, i.e. an $E_{\infty}$-space.
\item Then we apply the group completion functor to obtain a commutative group in spaces, i.e. a grouplike $E_{\infty}$-space.
\item Finally we apply the functor which maps a commutative group in spaces to the corresponding connective spectrum whose $\infty$-loop space is this group.
\end{enumerate}}
\end{rem}
\begin{rem}{\rm Note that the symmetric monoidal stacks ${\mathbf{Vect}}^{\nabla}$ and ${\mathbf{Vect}}$ are pulled back from from d stacks on the site $\Mf$ via the forgetful morphism \eqref{ewfwfwefewfewfwfwefwfe}. They same is true for the associated sheaves of $K$-theory spectra $\hat {\mathbf{ku}}^{\nabla}$ and $\hat {\mathbf{ku}}$. They represent differential versions of connective $K$-theory ${\mathbf{ku}}$ and are studied in detail in \cite[Sec. 6]{2013arXiv1311.3188B}
}\end{rem}

\subsection{Characteristic cocycles}\label{kfjwelfewfewfewfewfewfewfe}

In order to construct the regulator we use the method introduced in \cite{Bunke:2012fk} based on the notion of characteristic cocycles.
We consider the category of chain complexes $\Ch$.
We have $$DD^{-}, DD^{per}\in \Sh_{ \Ch }(\Mf_{\C-fol})$$
introduced in Definition \ref{jkdjlqwdqwdqwd}, where here we forget the algebra structure.
Using the Chern character forms (Definitions \ref{ffwefwefewfewfwfw} and \ref{qldjqwldqwdqwdqwd}) and their naturality (equations \eqref{wqdqwdqwdwqdwqwqdqd} and \eqref{wqdqwdqwdwqdwqwqdqd1}) we get characteristic cocycles (see \cite[Def. 2.12]{Bunke:2012fk})
$$\ch^{-}:\pi_{0}({\mathbf{Vect}}^{\flat,\nabla})\to Z^{0}(DD^{-}) \ , \quad
\ch:\pi_{0}({\mathbf{Vect}}^{ \nabla})\to Z^{0}(DD^{per}) \ .$$
Here $\pi_{0}$ sends a symmetric monoidal category to its commutative monoid of isomorphism classes.
We will consider commutative monoids as symmetric monoidal categories, see Remark \ref{dkwqnmqlkwdwqdwqdqd}.
The following diagram in ${\mathbf{PSh}}_{{\mathbf{CAlg}}({\mathbf{Cat}}[W^{-1}])}(\Mf_{\C-fol})$ commutes:
\begin{equation}\label{kjwehbfjkfhekfehwfkewfef09890}
\xymatrix{\pi_{0}({\mathbf{Vect}}^{\flat,\nabla})\ar[r]\ar[d]& Z^{0}(DD^{-})\ar[d]\\\pi_{0}({\mathbf{Vect}}^{ \nabla})\ar[r]&Z^{0}(DD^{per})}\ . \end{equation}

Let $$\mathbf{H}:\Ch[W^{-1}]\to \Sp$$ denote the Eilenberg-MacLane functor (see \cite[(22)]{Bunke:2012fk}). We will use the notation
\begin{equation}\label{ewfewfewfewf444efewfewfewfwef}
\sigma^{\ge p}\mathbf{DD}^{-}:=\mathbf{H}(\sigma^{\ge p }DD^{-})\ , \quad \sigma^{\ge p}\mathbf{DD}^{per}:=\mathbf{H}(\sigma^{\ge p}DD^{per})
\end{equation} for $p\in \Z$.
Note that
$$\sigma^{\ge p}\mathbf{DD}^{-}\ , \sigma^{\ge p}\mathbf{DD}^{per}\in \Sh_{ \Sp}(\Mf_{\C-fol})$$
(see \cite[Sec. 2.3]{Bunke:2014aa}).
Applying the general construction of regulators Definition \cite[Def. 2.14]{Bunke:2012fk}
we get the commuting diagram in ${\mathbf{PSh}}_{\Sp}(\Mf_{\C-fol})$:

\begin{equation}\label{kjwehbfjkfhekfehwfkewfef098901}
\xymatrix{\mathcal{K}({\mathbf{Vect}}^{\flat,\nabla}) \ar[r]^{r(\ch^{-})}\ar[d]& \sigma^{\ge 0}\mathbf{DD}^{-}\ar[d]\\\ \mathcal{K}({\mathbf{Vect}}^{\nabla})\ar[d]^{u}\ar[r] &\sigma^{\ge 0}\mathbf{DD}^{per}\\\hat {\mathbf{ku}}^{\nabla}\ar@{.>}[ur]_{r(\ch)}& }\ .
\end{equation}
In order to get the lower triangle we use that $\sigma^{\ge 0}\mathbf{DD}^{per}$ is a sheaf and the universal property of the unit $u$ of the sheafification.

\

\subsection{The class $[V,\nabla^{I}]^{alg}$}

Let us fix a manifold $X$. We want to consider foliations whose space of leaves is $X$. Trivial foliations of this type are obtained by taking the product of the typical leaf with $X$.
In this way we actually obtain an inclusion of manifolds into foliations. More precisely we consider the functor \begin{equation}\label{dwedewdewdwd}
j_{X}:\Mf \to \Mf_{\C-fol}\ , \quad j_{X}(P):=(P,\cF_{max})\times (X,\cF_{min})\ .
\end{equation} A manifold manifold $Y$ also gives rise to endofunctors
\begin{equation}\label{a1}
i_{Y}:\Mf \to \Mf \ , \quad i_{Y} (P):= Y\times P\ ,
\end{equation} and \begin{equation}\label{a2}
i_{Y}:\Mf_{ \C-fol} \to \Mf_{\C-fol}\ ,\quad i_{Y} (P,\cF):=(Y,\cF_{max})\times (P,\cF)\ .
\end{equation}
\bigskip

The projection $Y\to *$ induces a morphism $\id\to i_{Y}^{*}$.

\bigskip

Let $I:=[0,1]$ denote the unit interval. Let $\bC$ be a presentable $\infty$-category. \begin{ddd}
An object $A\in \Sh_{\bC}(\Mf)$ (or $ {\mathbf{PSh}}_{\bC}(\Mf)$, $ \Sh_{\bC}(\Mf_{\C-fol})$ or $ {\mathbf{PSh}}_{\bC}(\Mf_{\C-fol})$) is called homotopy invariant if the natural morphism
$A\to i_{I}^{*} A$ is an equivalence. \end{ddd}
We indicate the full subcategories of homotopy invariant (pre)sheaves by an upper index $h$.

\bigskip

\begin{ex}{\rm By \cite[Prop. 2.6, 1.]{2013arXiv1311.3188B} (see also Lemma \ref{kfhkwjefewfewf98790234jnbkjf} below), for a homotopy invariant sheaf $\bE\in \Sh^{h}_{\bC}(\Mf)$ we have a natural equivalence
$$\underline{\bE(*)}\simeq \bE\ .$$ If $\bC=\Sp$, then for
$M\in \Mf$ and $k\in \Z$ we have a natural isomorphism of abelian groups
\begin{equation}\label{fwefewfewfewfe324}
\pi_{k}(\bE(M))\cong \bE(*)^{-k}(M)\ .
\end{equation}
Observe that a similar statement is not true for homotopy invariant sheaves on $\Mf_{\C-fol}$.}
\end{ex}

\begin{lem}\label{leelfjwelfwefewf}
The sheaf ${\mathbf{Vect}}^{\flat}$ is homotopy invariant.
\end{lem}
\proof The reason is that the foliation of $i_{I}(M,\cF)=(I\times M,T_{\C}I\boxplus \cF)$ contains the $I$-direction. For $(V,\nabla^{I})\in {\mathbf{Vect}}^{\flat}(i_{X}(M,\cF))$
we can use the flat connection $\nabla^{I}$ in order to define a parallel transport in the $I$-direction.

Hence a vector bundle $(V,\nabla^{I})$ with a flat partial connection or a morphism between two such objects over $I\times M$ is uniquely determined by the restriction to $\{0\}\times M$. \hB

We now use the fact that on the site $\Mf$ sheafification preserves homotopy invariance.
\begin{lem}\label{kfhkwjefewfewf98790234jnbkjf}
If $\mathbf{F}\in {\mathbf{PSh}}^{h}_{\bC}(M)$, then $L(\mathbf{F})\simeq \underline{\mathbf{F}(*)} $. In particular $L(\mathbf{F})\in \Sh^{h}_{\bC}(\Mf)$.
\end{lem}
\proof If ${\mathcal{U}}$ is a good covering of a manifold $M$, then let $U^{\bullet} $ denote the associated simplicial manifold called the \v{C}ech nerve. We get a simplicial set $\pi_{0}(U^{\bullet})\in \sSet$. Applying the localization $\iota:\sSet\to \sSet[W^{-1}]$ we obtain a space
$ \iota \pi_{0}(U^{\bullet})\in \sSet[W^{-1}]$.

Using the inclusion $d:\Set\to \sSet$ of sets as discrete simplicial sets we can consider $d(\pi_{0}(U^{\bullet}))$ as a simplicial object in $\sSet$ which is levelwise discrete. If we apply $\iota$ levelwise, then we get a simplicial space
$\iota(d(\pi_{0}(U^{\bullet})))\in \Fun(\Delta^{op},\sSet[W^{-1}])$. Note that for every simplicial set $X$ we have a natural equivalence
\begin{equation}\label{wefewfwfewfewfwf2342343efdwef3232rd3455678}
\colim_{\Delta^{op}} \iota(d(X))\simeq \iota (X)\ .
\end{equation}
Since ${\mathcal{U}}$ was a good covering we have an equivalence
$$M_{top}\simeq \iota \pi_{0}(U^{\bullet})\stackrel{\eqref{wefewfwfewfewfwf2342343efdwef3232rd3455678}}{\simeq} \colim_{\Delta^{op}} \iota(d(\pi_{0}(U^{\bullet})))\ ,$$
where $M_{top}$ denotes the underlying space of $M$. We use this equivalence at the place marked by $!!$, and the homotopy invariance of $\mathbf{F}$ at $!$ in the following chain of equivalences: $$\lim_{\Delta} \mathbf{F}(U^{\bullet}) \stackrel{!}{\simeq} \lim_{\Delta} \mathbf{F}(*)^{\iota (d( \pi_{0}(U^{\bullet})))} \simeq \mathbf{F}(*)^{ \colim_{\Delta^{op}}\iota (d(\pi_{0}(U^{\bullet})))} \stackrel{!!}{\simeq} \mathbf{F}(*)^{M_{top}}\simeq \underline{\mathbf{F}(*)}(M)
\ .$$
Let ${\mathcal{L}}$ be the \v{C}echification operator (see \cite[Definition A.4]{Bunke:2012fk}).
Since good coverings are cofinal in the system of coverings involved in the definition of ${\mathcal{L}}$
we conclude that
$${\mathcal{L}}(\mathbf{F})(M)\simeq \underline{\mathbf{F}(*)}(M)\ .$$ Hence ${\mathcal{L}}(\mathbf{F})\simeq\underline{\mathbf{F}(*)} $ is a homotopy invariant sheaf. Using the equivalence \cite[(176)]{Bunke:2012fk}) we get that
$$L(\mathbf{F})\simeq L({\mathcal{L}}(\mathbf{F}))\simeq {\mathcal{L}}(\mathbf{F})\simeq \underline{\mathbf{F}(*)}\ .$$
\hB

We define $${\mathbf{K}}_{X}:=L(j_{X}^{*} \mathcal{K}({\mathbf{Vect}}^{\flat}))\in \Sh^{h}_{\Sp}(\Mf)\ .$$
Note that $j_{X}^{*}$ preserves homotopy invariance and the sheaf condition. By Lemmas \ref{leelfjwelfwefewf}
and \ref{kfhkwjefewfewf98790234jnbkjf} we see that ${\mathbf{K}}_{X}$ is indeed a homotopy invariant sheaf.

\bigskip

We have a chain of equivalences of symmetric monoidal categories $$j_{X}^{*}{\mathbf{Vect}}^{\flat}(*)\simeq {\mathbf{Vect}}^{\flat}(X,\cF_{min})\simeq {\mathbf{Vect}}(X)\simeq \mathbf{Proj}(C^{\infty}(X))\ ,$$
where the first three are obtained by specializing definitions, and the last is Swan's theorem.
This implies
\begin{equation}\label{fefwefwefewf234234sdfsdf}
{\mathbf{K}}_{X}(*)= \mathcal{K}(\mathbf{Proj}(C^{\infty}(X)))\stackrel{def}{=}{\mathbf{K}}(C^{\infty}(X))\ ,
\end{equation}
where the last equality is our definition of the connective algebraic $K$-theory spectrum of the ring $C^{\infty}(X)$.

We can now give the technical definition of the class $[V,\nabla^{I}]^{alg}\in {\mathbf{K}}(C^{\infty}(X))^{0}(P)$ for a pair $$(V,\nabla^{I})\in {\mathbf{Vect}}^{\flat}(P\times X,T_{\C}P\oplus 0)\ .$$ Indeed, we have
$(V,\nabla^{I})\in j_{X}^{*}{\mathbf{Vect}}^{\flat}(P)$. This object naturally represents a point in $\Omega^{\infty} {\mathbf{K}}_{X}(P)$.
\begin{ddd}\label{xgrergegg} We define \begin{equation}\label{fwefwefwefffefefewfewfewff324234}
[V,\nabla^{I}]^{alg}\in \pi_{0}({\mathbf{K}}_{X}(P))\stackrel{\eqref{fwefewfewfewfe324}}{\cong} {\mathbf{K}}_{X}(*)^{0}(P) \stackrel{\eqref{fefwefwefewf234234sdfsdf}}{\cong} {\mathbf{K}}(C^{\infty}(X))^{0}(P)\ .
\end{equation}
to be the connected component represented by the point $[V,\nabla^{I}]$.\end{ddd}

\subsection{Differential $K$-theory and the regulator map}

We assume that $\bC$ is a stable presentable $\infty$-category like spectra $\Sp$ or chain complexes $\Ch[W^{-1}]$.
We have have an adjunction
$$\cH:\Sh_{\bC}(\Mf )\leftrightarrows \Sh^{h}_{\bC}(\Mf ):inclusion\ ,$$
where $\cH$ is called homotopification. By \cite[Prop. 7.6.(2)]{2013arXiv1311.3188B} it is given by a composition
$\cH\simeq L\circ \cH^{pre}$, where $\cH^{pre}:\Sh_{\bC}(\Mf )\to {\mathbf{PSh}}^{h}_{\bC}(\Mf)$ is given by
\begin{equation}\label{fewfwfewfewfewfewfewfewfewfewf}
\cH^{pre} \simeq \colim_{\Delta^{op}} i_{\Delta^{\bullet}}^{*}
\end{equation}
using the notation \eqref{a1}.
Similarly, for the site $\Mf_{\C-fol}$ we have an adjunction \begin{equation}\label{ggg889899898893443}
\cH^{\flat}:\Sh_{\bC}(\Mf_{\C-fol} )\leftrightarrows \Sh^{h}_{\bC}(\Mf_{\C-fol} ):inclusion\ ,
\end{equation} where $\cH^{\flat}=L\circ \cH^{\flat,pre}$ with $\cH^{\flat, pre}$ given again by \eqref{fewfwfewfewfewfewfewfewfewfewf}, but now using \eqref{a2}.
For a manifold $X$ the functor $j_{X}^{*}$ (see \eqref{dwedewdewdwd}) preserves homotopy invariant sheaves. Moreover, if $X$ is compact, then we have \begin{equation}\label{refwefwefewfew45355321343241325465}
j_{X}^{*}\circ \cH^{\flat}\simeq \cH\circ j_{X}^{*}
\end{equation}
(compare \cite[Lemma 2.4 (4)]{Bunke:2014aa} for a proof of a similar statement).

\begin{lem} \label{iewfwefewfewfewf}The sheaves $\mathbf{DD}^{per}$ and $\mathbf{DD}^{-}$ are homotopy invariant. Moreover, for every $p\in \Z$ the inclusions
$$\sigma^{\ge p} \mathbf{DD}^{-}\to \mathbf{DD}^{-}\ , \quad \sigma^{\ge p} \mathbf{DD}^{per}\to \mathbf{DD}^{per}$$ are equivalent to the units of the homotopification.
\end{lem} \proof We start with the case of the map $\sigma^{\ge p} \mathbf{DD}^{per}\to \mathbf{DD}^{per}$ between sheaves on $\Mf$.
Recall the definition \eqref{ewfewfewfewf444efewfewfewfwef}. We let $$\iota:\Ch\to \Ch[W^{-1}]$$ be the canonical localization map.
We have $$\sigma^{\ge p}DD^{per}\cong \prod_{q\in \Z} (\sigma^{\ge p+2q}\Omega)[2q]\ .$$
We discuss the factors separately.
By \cite[Lemma 7.15]{2013arXiv1311.3188B} the map
$$\iota (\sigma^{\ge p+2q}\Omega)[2q] \to \iota( \Omega)[2q]$$ is the unit of the homotopification.
This implies the assertion for $ \mathbf{DD}^{per}$ after applying the Eilenberg-MacLane functor $\mathbf{H}$.

\bigskip

We now discuss $DD^{-}$. We first observe that
$\iota (DD^{-})$ is a homotopy invariant sheaf on the site $\Mf_{\C-fol}$ with values in $\Ch[W^{-1}]$.
We again consider one factor of $$ DD^{-}\cong \prod_{q\in \Z} F^{q}\Omega [2q]$$ at a time. For a foliated manifold $(M,\cF)$ the integration $\int_{I\times M/M}$ preserves the filtration and induces a map
$$\int_{I\times M/M} F^{q}\Omega(I\times M,T_{\C}I\boxplus \cF) \to F^{q}\Omega(M, \cF)[ -1]$$ such that
$$d\int_{I\times M/M} x=x_{|\{1\}\times M}-x_{|\{0\}\times M}\ .$$
This implies that $\iota (F^{q}\Omega )$ is homotopy invariant.

\begin{rem}{\rm The point here is that we define homotopy invariance along the leaf direction.
If we would include transverse directions, then the integral would not preserve the filtration. In this case we only have
$$\int_{I\times M/M} F^{p}\Omega(I\times M, \{0\}\boxplus\cF) \to F^{p-1}\Omega(M, \cF)[ -1]\ .$$
In this case the integration would not be defined on $DD^{-}$.
}
\end{rem}

Once we know that $\iota (F^{q}\Omega) \in \Sh_{\Ch[W^{-1}]}(\Mf_{\C-fol})$ is homotopy invariant, we show that $$\iota( \sigma^{\ge p} F^{q}\Omega) \to \iota (F^{q}\Omega) $$
is the unit of the homotopification exactly as in \cite[Lemma 7.15]{2013arXiv1311.3188B}.
Note that by (the analog of) \cite[Lemma 7.13]{2013arXiv1311.3188B} $\cH^{\flat}(\iota (F^{q}\Omega)^{\ell})=0$ for every $\ell\in \Z$.
This implies as in the proof of \cite[Lemma 7.15]{2013arXiv1311.3188B} that
$\cH^{\flat}(\iota (\sigma^{<p} F^{q}\Omega) )=0$. The claim now follows from an application of $\cH^{\flat}\circ \iota$ to the exact sequence of $\Ch$-valued sheaves
$$0\to\sigma^{<p} F^{q}\Omega \to F^{q}\Omega \to \sigma^{\ge p} F^{q}\Omega \to 0\ . $$

\hB

We define $${\mathbf{K}}^{\nabla}_{X}:=L(j_{X}^{*} \mathcal{K}({\mathbf{Vect}}^{\flat,\nabla}))\ .$$

\begin{lem}\label{kljldqwdqwdwqdd}
The morphisms $$ {\mathbf{K}}_{X}^{\nabla}\to {\mathbf{K}}_{X}\ , \quad \hat {\mathbf{ku}} \to \underline{{\mathbf{ku}}}\ , \quad \hat {\mathbf{ku}}^{\nabla}\to \underline{{\mathbf{ku}}}$$
are equivalent to the units of the homotopification.
\end{lem}
\proof The second and the third cases are consequences of \cite[Lemma 6.3]{2013arXiv1311.3188B}
and \cite[Lemma 6.5]{2013arXiv1311.3188B}. It remains to discuss the first case.
We know that ${\mathbf{K}}_{X}$ is homotopy invariant. Then the assertion now follows from the analog of
\cite[Lemma 6.4]{2013arXiv1311.3188B} for ${\mathbf{Vect}}^{\flat,\nabla}\to{\mathbf{Vect}}^{\flat}$. \hB

From \eqref{kjwehbfjkfhekfehwfkewfef098901} and the fact that the two objects on the right and the lower left corner are sheaves we get the diagram
\begin{equation}\label{kjwehbfjkfhekfehwfkewfeeedef098901}
\xymatrix{{\mathbf{K}}^{\nabla}_{X} \ar[r]^{j_{X}^{*}r(\ch^{-})}\ar[d]& j_{X}^{*}\sigma^{\ge 0}\mathbf{DD}^{-}\ar[d]\\ j_{X}^{*}\hat {\mathbf{ku}}^{ \nabla}\ar[r]^{j_{X}^{*}r(\ch)}&j_{X}^{*}\sigma^{\ge 0}\mathbf{DD}^{per}}\ .
\end{equation}
We now assume that $X$ is compact. Then by \eqref{refwefwefewfew45355321343241325465}
homotopification commutes with $j_{X}^{*}$.
Applying homotopification to this square and using Lemmas \ref{iewfwefewfewfewf}, \ref{kljldqwdqwdwqdd} we get the square \begin{equation}\label{r223r23r32r32r324}
\xymatrix{{\mathbf{K}}_{X} \ar[r]^{\omega^{-}_{X}}\ar[d]& j_{X}^{*} \mathbf{DD}^{-}\ar[d]\\ j_{X}^{*}\underline{{\mathbf{ku}}} \ar[r]^{\omega_{X}}& j_{X}^{*} \mathbf{DD}^{per}}\ . \end{equation}

We consider the following three versions of Hopkins-Singer type (see \cite{MR2192936} for the original definition and \cite{2013arXiv1311.3188B} for more information) differential algebraic and differential $K$-theories for $p\in \Z$
$$\xymatrix{\hat {\mathbf{K}}_{X }^{p} \ar[d]^{I}\ar[r]&j_{X}^{*}\sigma^{\ge p}\mathbf{DD}^{-}\ar[d]\\ {\mathbf{K}}_{X } \ar[r]^{\omega^{-}_{X}}&j^{*}_{X}\mathbf{DD}^{-}}\ , \quad \xymatrix{\hat {\mathbf{ku}}_{X}^{\flat,p}\ar[d]\ar[r]^{R}&j_{X}^{*}\sigma^{\ge p}\mathbf{DD}^{-}\ar[d]\\ j_{X}^{*}\underline{{\mathbf{ku}}}\ar[r]^{\omega_{X}}&j_{X}^{*}\mathbf{DD}^{per}}\ ,
\quad
\xymatrix{\hat {\mathbf{ku}}^{p}\ar[d]\ar[r]&\sigma^{\ge p}\mathbf{DD}^{per}\ar[d]\\\underline{{\mathbf{ku}}}\ar[r]&\mathbf{DD}^{per}}$$
defined by the respective pull-back square in $\Sh_{\Sp}(\Mf)$. We define the corresponding differential cohomology groups by
$$\hat K_{X}^{p}(P):=\pi_{-p}(\hat {\mathbf{K}}_{X }^{p} (P))\ , \quad \widehat{ku}_{X}^{\flat,p}(P):=\pi_{-p}(\hat {\mathbf{ku}}_{X }^{\flat, p} (P))\ , \quad
\widehat{ku}^{p}(P):=\pi_{-p}(\hat {\mathbf{ku}}^{p}(P))\ .$$

The square \eqref{r223r23r32r32r324} together with the obvious commutative square \begin{equation}\label{hjkdekdhewkdjhewkdewd}
\xymatrix{\sigma^{\ge p}\mathbf{DD}^{-} \ar[r]\ar[d]& \mathbf{DD}^{-}\ar[d]\\ \sigma^{\ge p}\mathbf{DD}^{per} \ar[r]& \mathbf{DD}^{per}}\ .\end{equation}
induces a chain of morphisms
$$\hat {\mathbf{K}}_{X}^{p}\to \hat {\mathbf{ku}}_{X}^{\flat,p}\to j^{*}_{X}\hat {\mathbf{ku}}^{p}\ .$$
Using \eqref{kjwehbfjkfhekfehwfkewfeeedef098901} we finally get the square
$$\xymatrix{ {\mathbf{K}}_{X}^{\nabla} \ar[r]^{cycl}\ar[d]& \hat {\mathbf{K}}_{X}^{0}\ar[d]\\ j_{X}^{*}{\mathbf{ku}}^{\nabla} \ar[r]^{cycl}&j_{X}^{*}\hat {\mathbf{ku}}^{0}}$$ where the horizontal maps are the differential cycle maps.

\bigskip

The following exact sequences are part of the general features of a Hopkins-Singer differential cohomology. The sequence
\begin{equation}\label{} \dots\to DD^{-}(P\times X,T_{\C}P\boxplus\{0\})^{\ell-1} /\im(d) \stackrel{a}{\to} \hat K_{X}^{\ell}(P)\stackrel{I}{\to} {\mathbf{K}}_{X}(*)^{\ell}(P)\to 0
\end{equation}
describes the set of possible differential lifts of topological classes. The second sequence
\begin{equation}\label{} 0\to \widehat{ku}^{\flat,\ell}_{X,flat}(P)\to \widehat{ku}^{\flat,\ell}_{X}(P)\stackrel{R}{\to} Z^{\ell}(DD^{-}(X\times P,T_{\C}P\boxplus\{0\}))\to\dots
\end{equation}
reflects the definition of the flat subgroup.

\bigskip

We consider the case $P=*$, $\ell:=-p$ and assume that $\dim(X)<p$. In this case it is straightforward to check that $DD^{-}(X,\cF_{min})^{-p-1}=0$ and $DD^{-}(X,\cF_{min})^{-p}=0$.

This implies the isomorphisms $$I:\hat K_{X}^{-p}(*)\stackrel{\cong}{\to} {\mathbf{K}}_{X}(*)^{-p}\ , \quad \widehat{ku}_{X,flat}^{\flat,-p}(*)\stackrel{\cong}{\to} \widehat{ku}_{X}^{\flat,-p}(*)\ .$$
\begin{ddd}\label{klfwefewfewfwf} For $p\in \nat$ such that $\dim(X)<p$ we define the regulator map
${\tt reg}_{X}$ as the composition
$$\hspace{-1cm}K_{p}(C^{\infty}(X))\cong {\mathbf{K}}_{X}(*)^{-p}\stackrel{\cong}{\leftarrow} \hat K_{X}^{-p}(*) \to \widehat{ku}_{X}^{\flat,-p} (*)\stackrel{\cong}{\rightarrow} \widehat{ku}^{\flat,-p}_{X,flat} (*)
\to \widehat{ku}^{-p}_{flat}(X,\cF_{min})\cong {\mathbf{ku}}\C/\Z^{-p-1}(X)$$
\end{ddd}

In Remark \ref{2ddhi3dhio32doi2doo2oidjud2} we will explain how this regulator can be obtained by specializing a more basic regulator.

\begin{rem}{\rm In \cite[Thm 1.1]{Bunke:2014aa} we defined a similar regulator map
$$\sigma_{p}:K_{p}(C^{\infty}(X))\to {\mathbf{ku}}\C/\Z^{-p-1}(X)\ .$$
using different methods. While here, in order to define the Chern character, we use characteristic forms associated to connections, in \cite{Bunke:2014aa} we use the Goodwillie-Jones Chern character.
The two Chern characters equivalent as primary invariants \cite[Lemma 2.27]{Bunke:2014aa}. In order to compare the two regulator maps $\sigma_{d}$ and ${\tt reg}_{X}$ we would need to compare the two Chern characters on the space level. So at the moment it remains an open question whether $\sigma_{d}={\tt reg}_{X}$.
}\end{rem}

\subsection{Integration and proof of Theorem \ref{flkfefwefwefewfef}}\label{fwklfwfewfewfewf}

We now assume that $P$ is closed and has a stable framing $s$. Then $f:P\to *$ has a natural differential orientation $\hat o_{s}$ (see \cite[Example 4.230]{2012arXiv1208.3961B}) and we have an associated Umkehr map in every Hopkins-Singer differential cohomology theory.
We further assume that $X$ is closed, spin and equipped with a Riemannian metric. This induces a differential ${\mathbf{ku}}$-orientation $\hat o$ of the projection $\pi:X\to *$, see Subsection \ref{wlekfjwelfjewlfwef123}.

Let $p:=\dim(P)$ and $d:=\dim(X)$. We have the commutative diagram
\begin{equation}\label{dewewdedewdewd2342343}
\xymatrix{ &&\widehat{KU}^{0}(P\times X)\ar[r]^{(\pi\circ f)^{\hat o\circ \hat o_{s}}_{!}}&\widehat{KU}^{-p-d}(*)\ar[r]^{\cong}&\C/\Z\ar@{=}[d]\\
\pi_{0}({\mathbf{K}}_{X}^{\nabla})\ar@/^3cm/[rrrru]^{(V,\nabla^{V})\mapsto \rho(M,\cF,\nabla^{I},s)} \ar[urr]^{(V,\nabla)\mapsto [V,\nabla]}\ar[d]_{(V,\nabla^{V})\mapsto [V,\nabla^{I}]^{alg}}\ar[r]^{cycl} &\hat K_{X}^{0}(P)\ar[r]\ar[dl]\ar[d]^{f_{!}^{\hat o_{s}}}\ar@{}[dr]^{\textcircled{2}}&\widehat{ku}^{0}(P\times X) \ar@{}[ur]^{\textcircled{2}}\ar[d]^{\hat f^{\hat o_{s}\times X}_{!}}\ar@{}[dr]^{\textcircled{1}}\ar[u]\ar[r]^{(\pi\circ f)^{\hat o\circ \hat o_{s}}_{!}}&\widehat{ku}^{-p-d}(*) \ar[u]\ar@{=}[d]\ar[r]^{\cong}&\C/\Z\ar@{=}[d]\\
{\mathbf{K}}_{X}(*)^{0}(P)\ar@{}[r]^{\textcircled{3}} \ar[d]^{f_{!}^{o_{s}}}&\ar[dl]^{\cong}\hat K^{-p}_{X}( *) \ar@{}[dr]^{\textcircled{4}}\ar[r] &\widehat{ku}^{-p }(X) \ar[r]^(0.5){\pi_{!}^{\hat o}} &\widehat{ku}^{-d-p}(*) \ar[r]^{\cong}&\C/\Z
\\ {\mathbf{K}}_{X}(*)^{-p} \ar[rr]^{{\tt reg}_{X}}
& &{\mathbf{ku}}\C/\Z^{-p-1}(X)\ar[r]^{\pi_{!}^{o}} \ar[u]\ar@{}[ur]^{\textcircled{5}}&{\mathbf{ku}}\C/\Z^{-d-p-1}(*)\ar[u]_{\cong } & }
\end{equation}
The square $\textcircled{1}$ commutes by the ${\mathbf{ku}}$-analog of \eqref{e23e23e3e23e32e32e2}.
For the squares $\textcircled{2}$ we use that integration commutes with transformations between Hopkins-Singer differential cohomology theories provided the orientations are related correspondingly.
For the square $\textcircled{3}$ we use the right-most square of the ${\mathbf{ku}}$-analog of \eqref{r23r23r23r23r23r235435346546}.
The square $\textcircled{4}$ commutes by the definition of the regulator. For $\textcircled{5}$
we use that the identification of the flat subgroup in a Hopkins-Singer differential cohomology with with the $\C/\Z$-version of the underlying cohomology theory is compatible with integration, i.e. the left-most square in the ${\mathbf{ku}}$-analog of \eqref{r23r23r23r23r23r235435346546}.

\bigskip

The upper composition in \eqref{dewewdedewdewd2342343} maps $(V,\nabla^{V})$, essentially by definition, to $\rho(M,\cF,\nabla^{I},s)$ as indicated. The down-right composition sends $(V,\nabla^{V})$ to
$$\pi_{!}^{o}({\tt reg}_{X}(f_{!}^{o_{s}}([V,\nabla^{I}]^{alg})))\ .$$
Thus Theorem \ref{flkfefwefwefewfef} follows from the commutativity of \eqref{dewewdedewdewd2342343}. \hB

\section{Algebraic $K$-theory of foliations}\label{dkqwldqwdwqdwqdwqdwqd}

In this section we define the algebraic $K$-theory sheaf ${\mathbf{K}}$ on $\Mf_{\C-fol}$. Its homotopy groups $$K^{*}(M,\cF):=\pi_{-*}({\mathbf{K}}(M,\cF)) $$ can be considered as the algebraic $K$-theory groups of the foliation $(M,\cF)$. We further introduce the Hodge-filtered connective $K$-theory sheaf $ {\mathbf{ku}}^{\flat}$ and define a regulator
$${\tt reg}:{\mathbf{K}}\to {\mathbf{ku}}^{\flat}\ .$$ For $p>\mathrm{codim}(\cF)$ it induces a map
$$\tilde {\tt reg}:K^{-p}(M,\cF)\to {\mathbf{ku}}\C/\Z^{-p-1}(M)$$
which generalizes the regulator introduced in Definition \ref{klfwefewfewfwf}.

\bigskip

\begin{rem}{\rm This section has a considerable overlap with the work of Karoubi \cite{karoubi43}, \cite{karoubi45}.
We add this section to the present paper since it fits well with the set-up developed here and puts the regulator in its natural framework.
We will study this regulator and examples elsewhere.
}
\end{rem}

We will use the notation introduced in Subsection \ref{klfwjlefewfewf}. In particular ${\mathbf{Vect}}^{\flat}$ and ${\mathbf{Vect}}^{\flat,\nabla}$ denote the symmetric monoidal stacks of pairs $(V,\nabla^{I})$ and $(V,\nabla)$ of complex vector bundles and flat partial connections, or complex vector bundles and connections whose restriction to the foliation is flat, respectively.
The symbols $L$ and $\cH^{\flat}$ denote the sheafification and the homotopification operations.

\begin{ddd} We define sheaves of spectra
$$ {\mathbf{K}} := \cH^{\flat}(L(\mathcal{K}({\mathbf{Vect}}^{\flat}))) \in \Sh^{h}_{\Sp}(\Mf_{\C-fol})\ , \quad {\mathbf{K}}^{\nabla} := L(\mathcal{K}({\mathbf{Vect}}^{\flat,\nabla})) \in \Sh_{\Sp}(\Mf_{\C-fol})\ .$$ For $p\in \Z$ we define the algebraic $K$-theory of a foliated manifold $(M,\cF)$ by
$$ K^{p}(M,\cF):=\pi_{-p}( {\mathbf{K}} (M,\cF))\ .$$
\end{ddd}
\begin{rem}\rm {Note that $\mathcal{K}({\mathbf{Vect}}^{\flat})$ is homotopy invariant.
We expect that the sheafification preserves homotopy invariance so that the homotopification is not really necessary in this definition.
}\end{rem}

In order to motivate this definition let us discuss special cases.

\begin{ex}{\rm Recall the functor $j:=j_{*}:\Mf\to \Mf_{\C-fol}$ given by $$j(M):=(M,\cF_{max})\ ,$$ see \eqref{dwedewdewdwd}. Let ${\mathbf{K}}(\C)$ denote the connective algebraic $K$-theory spectrum of the field $\C$.
\begin{lem} \label{fwekflewfwfewfw} We have an equivalence $j^{*}{\mathbf{K}}\simeq \underline{{\mathbf{K}}(\C)}$ \end{lem} \proof Since
$j (I\times M)\cong (I, T_{\C}I)\times j(M)$ we conclude that $j^{*}$ preserves homotopy invariant sheaves. Since ${\mathbf{K}}$ is homotopy invariant the sheaf
$j^{*}{\mathbf{K}}$ is homotopy invariant. Therefore (see \cite[Prop. 2.6, 1.]{2013arXiv1311.3188B}) we have an equivalence
$$j^{*}{\mathbf{K}}\simeq \underline{(j^{*}{\mathbf{K}})(*)}\ .$$
If $\bE$ is a presheaf of spectra on $\Mf_{\C-fol}$ and $L$ is the sheafification \eqref{dqdqwdwqdwqdwqdwqdqd}, then have a natural equivalence of spectra $L(\bE)(*)\simeq \bE(*)$.
Consequently,
$$(j^{*}{\mathbf{K}})(*)\simeq \mathcal{K}({\mathbf{Vect}}^{\flat}(*,\cF_{max}))\ .$$ The category ${\mathbf{Vect}}^{\flat}(*,\cF_{max}) $ is the category of finite-dimensional complex vector spaces. Consequently we have an equivalence of spectra $$\mathcal{K}({\mathbf{Vect}}^{\flat}(*,\cF_{max}))\simeq {\mathbf{K}}(\C)\ .$$
The combination of these equivalences gives the assertion of the Lemma.
\hB

\bigskip

As a consequence of Lemma \ref{fwekflewfwfewfw} we have for a manifold $M$ \begin{equation}\label{cejkwdjejdhewdhewkdei832e32e32e32e2e}
K^{*}(M,\cF_{max})\cong {\mathbf{K}}(\C)^{*}(M)\ .
\end{equation}

}
\end{ex}

\begin{ex}\label{kjffjewfewjkfhewkjfewfkewfhewf87z}{\rm We have a natural functor
$\kappa:\Mf\to \Mf_{\C-fol}$ which is given by $M\mapsto (M,\cF_{min})$.
On the site $\Mf$ we have the differential cohomology theory $\hat {\mathbf{ku}}$, see \eqref{wqdwqddq21321} and \cite{2013arXiv1311.3188B}.
We have an equivalence of sheaves of spectra on $\Mf_{\C-fol}$ $$\kappa^{*}{\mathbf{K}}\simeq \hat {\mathbf{ku}}\ .$$
Consequently, $$K^{*}(M,\cF_{min})\cong \widehat{ku}^{*}(M)\ .$$

}
\end{ex}

\begin{ex}{\rm For a fixed manifold $P$ there is a natural map \begin{equation}\label{dkejwdlewdewded}
{\mathbf{K}}_{X}(P)\to {\mathbf{K}}(P\times X,T_{\C}P\boxplus\{0\})\end{equation}
which is natural in $X$. It is essentially the sheafification morphism in the direction of $X$.
The spectrum valued functor $$P\mapsto {\mathbf{K}}(P\times X, T_{\C}P\boxplus 0)$$ is a homotopy invariant sheaf on $\Mf$.
Since
$${\mathbf{K}}(\{*\}\times X, T_{\C}\{0\}\boxplus 0)\simeq \hat {\mathbf{ku}}(X)$$ by \cite[Prop.2.6, 1.]{2013arXiv1311.3188B}) it is therefore equivalent to $\underline{\hat {\mathbf{ku}}(X)}$. We thus get a map
$$ K_{X}^{*}(P)\to K^{*}(P\times X, T_{\C}P\boxplus 0)\cong \hat{{\mathbf{ku}}}(X)^{*}(P)\ .$$
}
\end{ex}

\begin{ex}\label{kdjqwkldjwqldjwqldwqd}{\rm Assume that $X$ is a smooth complex algebraic variety and let $X^{an}$ be its associated complex manifold with foliation $\cF:=T^{0,1}X^{an}$. Then we can consider the algebraic $K$-theory of ${\mathbf{K}}^{alg}(X)$. It is defined like ${\mathbf{K}}(M,\cF)$ as the sheafification of the presheaf $X\supset U\mapsto \mathcal{K}({\mathbf{Vect}}^{alg}(U))$, where ${\mathbf{Vect}}^{alg}(U)$ is the symmetric monoidal category of algebraic vector bundles on the Zariski open subset $U$. Since the analytic topology of $M$ refines the Zariski topology of $X$ the transformations ${\mathbf{Vect}}^{alg}(U)\to {\mathbf{Vect}}(U^{an})$ induce a map
$${\mathbf{K}}^{alg}(X)\to {\mathbf{K}}(X^{an},T^{0,1}X)\ .$$
This example justifies to call ${\mathbf{K}}(M,\cF)$ the algebraic $K$-theory spectrum of the foliated manifold $(M,\cF)$.
}
\end{ex}

\begin{ex}{\rm If $(V,\nabla^{I})$ is a complex vector bundle with flat partial connection on a foliated manifold $(M,\cF)$, then we get a class
$$[V,\nabla^{I}]^{alg}\in K^{0}(M,\cF)\ .$$
Similarly, if $\nabla$ is a connection which extends $\nabla^{I}$, then we get a class
$$[V,\nabla]^{alg}\in \pi_{0}({\mathbf{K}}^{\nabla}(M,\cF))\ .$$}
\end{ex}

\bigskip

From \eqref{kjwehbfjkfhekfehwfkewfef098901} and the fact that the objects on the right and the lower left corner are sheaves we get the diagram
\begin{equation}\label{kjwehbfjkfhekfehwfkewfeeedef098901rrr}
\xymatrix{{\mathbf{K}}^{\nabla} \ar[r]^{ r(\ch^{-})}\ar[d]& \sigma^{\ge 0}\mathbf{DD}^{-}\ar[d]\\ \hat {\mathbf{ku}}^{ \nabla}\ar[r]^{ r(\ch)}& \sigma^{\ge 0}\mathbf{DD}^{per}}\ .
\end{equation}
Applying homotopification to this square and using the Lemmas \ref{iewfwefewfewfewf} and \ref{kljldqwdqwdwqdd}
we get the square \begin{equation}\label{r223r23r32r32r324rrr}
\xymatrix{{\mathbf{K}} \ar[r]^{\omega^{-}}\ar[d]& \mathbf{DD}^{-}\ar[d]\\ \underline{{\mathbf{ku}}} \ar[r]^{\omega}& \mathbf{DD}^{per}}\ . \end{equation}

\begin{ddd}
We define the Hodge-filtered connective complex ${\mathbf{ku}}$-theory sheaf ${\mathbf{ku}}^{\flat}$ on $\Mf_{\C-fol}$ by the pull-back square \begin{equation}\label{ffewfewfwefewfewf234}
\xymatrix{ {\mathbf{ku}}^{\flat}\ar[d]\ar[r]&\mathbf{DD}^{-}\ar[d]\\
\underline{{\mathbf{ku}}}\ar[r]^{\omega}&\mathbf{DD}^{per}}\ .
\end{equation}
We let $$ ku^{\flat,p}(M,\cF):=\pi_{-p}( {\mathbf{ku}}^{\flat}(M,\cF))$$ be the corresponding Hodge-filtered ${\mathbf{ku}}$-theory groups of $(M,\cF)$.
\end{ddd}

\begin{rem}{\rm In \cite{karoubi43}, \cite{karoubi45} Karoubi introduced, starting from a filtration of the de Rham complex, the multiplicative K-theory $\mathbf{MK}$. Applied to the filtration (Definition \ref{ilfjewlfwfewfewfewfwfw}) coming from a foliation the multiplicative K-theory groups $\mathbf{MK}^{*}(M,\cF)$ are the Hodge-filtered $\mathbf{KU}$-theory groups of $(M,\cF)$. In other words, $ku^{\flat,*}$ is the connective $K$-theory analog of Karoubi's multiplicative $K$-theory. If one applies the functor $\Omega^{\infty}$ to \eqref{ffewfewfwefewfewf234}, the one obtains a pull-back square of sheaves of spaces which is the analog of the square just before the statement of Theorem 7.3 in \cite{karoubi45}. The fact that ${\mathbf{ku}}^{\flat}$ is a sheaf of spectra implies a Mayer-Vietoris type sequence for an open decomposition of a foliated manifold. This is Karoubi's theorem \cite[Thm. 7.7]{karoubi45}.

For a justification to use the term {\em Hodge-filtered...} instead of {\em multiplicative...}
see Remark \ref{jhdjkhkjqwdhqwkjdhwqkjdqwdwqd}.

}
\end{rem}

\begin{rem}\label{jhdjkhkjqwdhqwkjdhwqkjdqwdwqd}{\rm The Hodge-filtered connective complex ${\mathbf{ku}}$-theory ${\mathbf{ku}}^{\flat}$ is the ${\mathbf{ku}}$-theory analog of the integral Deligne cohomology which would be the Hodge filtered version of $H\Z$. While integral Deligne cohomology is the natural target for cycle maps from Chow groups of algebraic cycles, ${\mathbf{ku}}^{\flat}$ is the natural target of the regulator from algebraic $K$-theory. In \cite{MR3335251} the authors defined for every spectrum over $\mathbf{H} \Z$ a Hodge-filtered version. In an analogous manner, replacing integral Deligne cohomology by ${\mathbf{ku}}^{\flat}$ one could construct Hodge filtered cohomology theories for spectra over ${\mathbf{ku}}$. Observe that
${\mathbf{ku}}^{\flat}$ is the Hodge-filtered version associated to the identity ${\mathbf{ku}}\to {\mathbf{ku}}$. This fact motivates the name.
}
\end{rem}
In view of Lemma \ref{iewfwefewfewfewf} the sheaf
$ {\mathbf{ku}}^{\flat}$ is homotopy invariant (compare \cite[Thm 4.8]{karoubi43}). This fact is reflected in our notation by not using a $\hat{(\dots)}$-decoration.

\bigskip

\begin{ddd}\label{ojlfewfefwefw234234}
We define the regulator ${\tt reg}:{\mathbf{K}}\to {\mathbf{ku}}^{\flat}$ to be the morphism induced by the square \eqref{r223r23r32r32r324rrr} and the universal property of the pull-back square \eqref{ffewfewfwefewfewf234}.
\end{ddd}

\begin{rem}
{\rm Such a regulator has first been defined in \cite[Sec.4]{karoubi45}. Karoubi's regulator provides a factorization
$${\mathbf{K}}\to \mathbf{MK}\to \mathbf{KU}$$
of the map from algbraic to topological $K$-theory.
Our analog is
$${\mathbf{K}}\to {\mathbf{ku}}^{\flat}\to {\mathbf{ku}}\ .$$

}
\end{rem}

\bigskip

\begin{rem}{\rm The map
$${\tt reg}:{\mathbf{K}}\to {\mathbf{ku}}^{\flat}$$ could be considered as a foliated and integral analog of Beilinson's regulator.
In order to see this we show that the classical Beilinson regulator can be factored over the regulator ${\tt reg}$ defined above.

We first interpret real Deligne cohomology as Hodge-filtered ${\mathbf{ku}}\R$-theory. Here as usual, we write ${\mathbf{ku}}\R:={\mathbf{ku}}\wedge M\R$ for the product of ${\mathbf{ku}}$ with the Moore spectrum of $\R$.
The Chern character induces an equivalence of spectra $${\mathbf{ku}}\R\simeq \prod_{p\ge 0}\mathbf{H}\R[2p]\ .$$
The de Rham equivalence $\underline{\mathbf{H}\R}\simeq \mathbf{H}\Omega_{\R}$ provides the second equivalence in the composition
$$\underline{{\mathbf{ku}}\R} \simeq \underline{\prod_{p\ge 0}\mathbf{H}\R[2p]}\simeq \prod_{p\ge 0} \mathbf{H}\Omega_{\R}[2p]\to \mathbf{DD}^{per}\ ,$$
where the last map is the natural inclusion. This composition provides the lower horizontal map in the pull-back square in $\Sh_{\Sp}(\Mf_{\C-fol})$ \begin{equation}\label{wefwefewfewfewfewfw}
\xymatrix{\mathbf{H}_{\R,Del}\ar[d]\ar[r]&\mathbf{DD}^{-}\ar[d]\\
\underline{{\mathbf{ku}}\R}\ar[r]&\mathbf{DD}^{per}}
\end{equation}
which defines $$\mathbf{H}_{\R,Del}\in \Sh^{h}_{\Sp}(\Mf_{\C-fol})\ .$$ On the one hand, this is the Hodge-filtered version of ${\mathbf{ku}}\R$-theory. On the other hand it is a generalization of real Deligne cohomology to foliated manifolds. In fact, for a smooth complex algebraic variety $X$ we have a natural isomorphism \begin{equation}\label{frff877823r87786r8723r32r32r}
\pi_{*}(\mathbf{H}_{\R,Del} (X^{an},T^{0,1}X))\cong \prod_{p\in \nat} H_{Del,an}^{2p-*}(X^{an},\R(p))\ .
\end{equation}
Note that $\mathbf{H}_{\R,Del} $ does not involve the weight-filtration and therefore reflects the "wrong" Hodge filtration on $H^{*}(X^{an};\C)$ for non-proper $X$ .

\bigskip

The natural map ${\mathbf{ku}}\to {\mathbf{ku}}\R$ induces a morphism of pull-back squares $\eqref{ffewfewfwefewfewf234}\to \eqref{wefwefewfewfewfewfw}$ and therefore a morphism,
$$\ch_{\R,Del}: {\mathbf{ku}}^{\flat}\to\mathbf{H}_{\R,Del}\ .$$
The composition
$${\mathbf{K}}\stackrel{{\tt reg}}{\to} {\mathbf{ku}}^{\flat}\stackrel{\ch_{\R,Del}}{\to} \mathbf{H}_{\R,Del}$$ yields indeed Beilinson's regulator if one applies this to the foliated manifolds $(X^{an},T^{0,1}X)$, precomposes with
${\mathbf{K}}^{alg}(X)\to {\mathbf{K}}(X^{an},T^{0,1}X)$, see Example \ref{kdjqwkldjwqldjwqldwqd}, and uses the identification \eqref{frff877823r87786r8723r32r32r}.
This easily follows from the description of Beilinson's regulator given in \cite{Bunke:2012fk}, \cite{Bunke:2013aa}.
}
\end{rem}

Let $(M,\cF)$ be a foliated manifold.

\begin{lem}
If $\mathrm{codim}(\cF)<p$, then we have a natural isomorphism
$${\mathbf{ku}}\C/\Z^{-p-1}(M)\cong ku^{\flat,-p}(M,\cF)\ .$$
\end{lem} \proof This easily follows from
$$\pi_{p}(\mathbf{DD}^{-}(M,\cF))\cong 0\cong \pi_{p+1}(\mathbf{DD}^{-}(M,\cF))\ .$$ \hB

\begin{kor}
If $\mathrm{codim}(\cF)<p$, then the regulator (Definition \ref{ojlfewfefwefw234234}) induces a map
$$\tilde {\tt reg}:K^{-p}(M,\cF)\to {\mathbf{ku}}\C/\Z^{-p-1}(M)\ .$$
\end{kor}

\begin{rem}\label{2ddhi3dhio32doi2doo2oidjud2}{\rm We have a factorization of ${\tt reg}_{X}$ defined in Definition \ref{klfwefewfewfwf}
as
$$\xymatrix{K_{X}^{-p}(*)\ar[r]^{{\tt reg}_{X}}\ar[d]^{\eqref{dkejwdlewdewded}}&{\mathbf{ku}}\C/\Z^{-p-1}(X)\\
K^{-p}(X,\cF_{max})\ar[ur]^{\tilde {\tt reg}}\ar[r]^{\eqref{cejkwdjejdhewdhewkdei832e32e32e32e2e}}_{\cong}&{\mathbf{K}}(\C)^{-p}(X)\ar[u]^{\sigma}}$$
for every $p\ge 1$. Here $\sigma:{\mathbf{K}}(\C)\to {\mathbf{ku}}\C/\Z$ is the morphism discussed e.g. in \cite[7.21]{karoubiast}, \cite[Ex. 6.9]{2013arXiv1311.3188B}.}
\end{rem}

\begin{rem}{\rm In \cite{Bunke:2014aa} we asked whether the map \begin{equation}\label{hdgqhjdgqwhdghjqwgdwhjgdj7861}
K_{p}(C^{\infty}(X))\to K^{top}_{p}(C^{\infty}(X))\end{equation} can be non-trivial for $p>\dim(X)$.
This question has an analog in the foliated case.

Note that
$$\kappa^{*}{\mathbf{K}}\to \underline{{\mathbf{ku}}}$$ (see Example \ref{kjffjewfewjkfhewkjfewfkewfhewf87z} for $\kappa$) is the homotopification morphism.
The question is now: \begin{prob}\label{lkdeqwdqwdqwd} Let $(M,\cF)$ be a foliated manifold and $p\in \nat$ be such that $\mathrm{codim}(\cF)<p$. Is the map
$$K^{-p}(M,\cF)\to {\mathbf{ku}}^{-p}(M)$$
trivial?
\end{prob}

In the special case of a minimal foliation we ask wether $$K^{-p}(X,\cF_{min})\to {\mathbf{ku}}^{-p}(X)$$
can be non-trivial for $p>\dim(X)$.
The difference to \eqref{hdgqhjdgqwhdghjqwgdwhjgdj7861} can best be explained by the commutative diagram
$$ \xymatrix{K_{p}(C^{\infty}(X))\ar[d]\ar[r]&K_{p}^{top}(C^{\infty}(X))\ar[d] \\K^{-p}(X,\cF_{min})\ar[r]&{\mathbf{ku}}^{-p}(X)}\ ,$$
where the vertical maps are induced by sheafification in the $X$-direction.

\bigskip

In the foliation case we can answer the Question \ref{lkdeqwdqwdqwd} affimative at least rationally.
\begin{prop}
Let $(M,\cF)$ be a foliated manifold, $p\in \nat$ such that $\mathrm{codim}(\cF)<p$ and
$x\in K^{-p}(M,\cF)$.
Then the image
$x_{\Q}\in {\mathbf{ku}}\Q^{-p}(M)$ vanishes.\end{prop}\proof Note that the natural map ${\mathbf{ku}}\Q^{*}(M)\to {\mathbf{ku}}\C^{*}(M)$ is injective and that the Bockstein sequence
$${\mathbf{ku}}\C/\Z^{-p-1}(M)\stackrel{\beta}{\to} {\mathbf{ku}}^{-p}(M)\stackrel{c}{\to} {\mathbf{ku}}\C^{-p}(M)$$
is exact. We write $x_{\C}$ for the image of $x_{\Q}$ in ${\mathbf{ku}}\C^{-p}(M)$.
We have
$x_{\C}=c(\beta(\tilde{\tt reg}(x))=0$. \hB

}\end{rem}
\section{Introduction}

In numerous cases of applied mathematics and mathematical physics the solutions to problems can only be represented as series derived by means of some kind of perturbation theory or iterative procedure. A great majority of such series is even divergent, having meaning only as asymptotic series for an infinitesimally small expansion variable. While the considered problems often require to consider finite values of this variable, sometimes even very large values. The standard way of treating such asymptotic series, for the purpose of their extrapolation to the finite values of the variable, is by invoking the Pad\'{e} approximants
\cite{Baker1}. The latter, however, exhibit several deficiencies limiting their applicability, as is discussed in Refs. \cite{Baker1,Gluzman2}, for instance, such a notorious deficiency as the appearance of spurious poles.
Another weak point is the ambiguity of choosing one of the Pad\'{e}
approximants $P_{M/N}$ from the table of many admissible, for each series of order $k$, variants satisfying the condition $M + N + 1 = k$. Also, in the limit of a large variable $x$ the approximant $P_{M/N}(x)$ behaves as
$x^{M - N}$. Hence, only integer powers of $x$ are allowed. It is possible to improve the results by employing the modified Pad\'{e} approximants
\cite{Baker_3}, corresponding to the power $P_{M/N}^\gamma$, with choosing the appropriate value of $\gamma$ satisfying the large-variable limit.

In the present paper, we show that it is possible to formulate a general method for effectively extrapolating and interpolating asymptotic series.
The method enjoys the following advantages: (i) It is unambiguously defined for each given series of order $k$; (ii) It allows for the treatment of large-variable behavior of any type, whether with integer, rational, or irrational powers; (iii) Being more general, it is not less accurate than the method of the Pad\'{e} approximants, when the latter exist, in many cases, being more accurate.

In the great majority of realistic situations, only a few terms of asymptotic expansions are available. Therefore, in the examples below, we do not consider very large series, showing that even several of terms allow us to derive quite accurate approximations.

\section{Self-similar root approximants}

Suppose we are interested in finding a real function $f(x)$ of a real variable $x$. However, this function is defined by a complicated equation that cannot be solved exactly. But, applying a kind of perturbation theory,
we can derive the small-variable behavior of this function
\begin{equation}
\label{1}
f(x) \simeq f_k(x) \qquad ( x \rightarrow 0 ) \; ,
\end{equation} represented by asymptotic series, with the $k$-th order expansion
\begin{equation}
\label{2}
f_k(x) = f_0(x) \left ( 1 + \sum_{n=1}^k a_n x^n \right ) \; ,
\end{equation} where
\begin{equation}
\label{3}
f_0(x) = A x^\alpha \; .
\end{equation}

Sometimes, the large-variable behavior of the function
\begin{equation}
\label{4}
f(x) \simeq f^{(p)}(x) \qquad ( x \rightarrow \infty )
\end{equation} is also known and can be represented by an expansion over $1/x$ as
\begin{equation}
\label{5}
f^{(p)}(x) = f_\infty(x) \left ( 1 + \sum_{n=1}^p \; \frac{b_n}{x^n}
\right ) \; ,
\end{equation} with
\begin{equation}
\label{6}
f_\infty(x) = B x^\beta \; .
\end{equation}

For what follows, it is convenient to deal with the ratio $f(x)/f_0(x)$,
which at small variable $x \rightarrow 0$ behaves as
\begin{equation}
\label{7}
\frac{f(x)}{f_0(x)} \simeq \frac{f_k(x)}{f_0(x)} = 1 +
\sum_{n=1}^k a_n x^n \; ,
\end{equation} and at large values of the variable $x \rightarrow \infty$ it tends to
\begin{equation}
\label{8}
\frac{f(x)}{f_0(x)} \simeq \frac{f_\infty(x)}{f_0(x)} =
\frac{B}{A} \; x^{\beta-\alpha} \; .
\end{equation}

The extrapolation of the small-variable expansions to the large-variable region can be done by means of self-similar approximation theory
\cite{Yukalov_4,Yukalov_5,Yukalov_6,Yukalov_7,Yukalov_8}. In this approach,
the transfer from a $k$-th order approximation, say, a small-variable expansion, to the higher orders of approximations are treated as the motion with respect to the approximation order $k$ playing the role of discrete time. Constructing a dynamical system, whose trajectory is bijective to the sequence of approximations, makes it feasible to find a fixed point representing the sought function. The convergence to the fixed point is governed by control functions. The self-similar approximation theory combines the methods of optimal control theory, dynamical theory,
and renormalization-group approach. We shall not go into the details and mathematical justification of the self-similar approximation theory that has been thoroughly expounded in Refs.
\cite{Yukalov_4,Yukalov_5,Yukalov_6,Yukalov_7,Yukalov_8}, but we shall use some of its consequences.

Employing this theory for the purpose of interpolation between the small-variable and large-variable regions, it is possible to come
\cite{Yukalov_9,Gluzman_10,Yukalov_11} to the self-similar root approximant
\begin{equation}
\label{9}
\frac{f^*_k(x)}{f_0(x)} = \left ( \left ( \ldots ( 1 + A_1 x )^{n_1}
+ A_2 x^2 \right )^{n_2} + \ldots + A_kx^k \right )^{n_k} \; .
\end{equation} A theorem has been proved \cite{Yukalov_12} stating that all parameters
$A_i$ and powers $n_i$ of approximant (9) are uniquely defined through the large-variable form (5).

However, the root approximant (9) cannot be uniquely defined through the small-variable expansion (2). This hinders the applicability of the approximant (9), since in the majority of cases, the small-variable expansion is better known, providing a number of terms, while the knowledge of the large-variable behavior is limited by just a single term (6), often even without precise data for the amplitude $B$. In order to extend the applicability of approximant (9) to be uniquely defined through the small-variable expansion, it is necessary to impose some constraints on the powers $n_j$. Such a straightforward constraint is the requirement that all parameters $A_j$ of approximant (9) be involved in the definition of the large-variable limit, which implies the relation
\begin{equation}
\label{10}
n_j = \frac{j+1}{j} \qquad ( j = 1,2,\ldots, k-1) \; ,
\end{equation} with $n_k = \beta - \alpha$. By expanding Eq. (9) in powers of $x$, it is easy to prove that all parameters $A_j$ are uniquely defined through the coefficients $a_j$ of small-variable expansion (2). In addition, we can require the validity of the limiting form (6), which improves accuracy.

The self-similar root approximant (9), with conditions (6) and (10),
whose parameters $A_j$ are uniquely defined by the accuracy-through-order procedure and are expressed through the coefficients $a_j$ of the small-variable expansion (2), can be called, for short, the {\it root approximant}. In the following sections, we demonstrate that this root approximant provides quite accurate approximations for different problems,
uniformly extrapolating the small-variable expansion (2), valid for
$x \rightarrow 0$, to the whole region of $x \in [0,\infty]$.

\section{Illustration by simple examples}

Before going to more complicated problems, we show the efficiency of the method by simple cases.

\subsection{Hard-core scattering problem}

Let us start the illustration of the method from the problem considered by Baker and Gammel \cite{Baker_3}. When calculating the scattering length of a repulsive square-well potential, one meets the integral
$$
S(x) = \int_0^x \left ( \frac{\sin t}{t^3} \; - \; \frac{\cos t}{t^2}
\right )^2 \; dt \; ,
$$
whose limit, as $x \rightarrow \infty$, equals $\pi/15$. Baker and Gammel state that this integral cannot be correctly evaluated by the standard Pad\'{e} method.
To solve the problem, they suggest a modified method employing a power of the Pad\'{e} approximant. We show below that such integrals can easily be treated by means of the root approximants.

The small-variable expansion of this integral reads as
$$
S(x) \simeq \frac{x}{9} \; - \; \frac{x^3}{135} + \frac{x^5}{2625} \; - \;
\frac{4x^7}{297675} + \frac{2x^9}{5893965} \; - \; \frac{x^{11}}{166080925} +
\frac{x^{13}}{10672286625} \; .
$$
Comparing this with form (2), we have $S_0(x) = x/9$. Since expansion (2)
is in powers of $x^2$, we construct the root approximants (9) using $x^2$ as a variable. Thus, the root approximant of third order is
$$
S_3^*(x) = \frac{x}{9} \left ( \left ( \left ( 1 + A_1 x^2 \right )^2 +
A_2 x^4 \right )^{3/2} + A_3 x^6 \right )^{-1/6}\; ,
$$
where the parameters are
$$
A_1 = 0.133333 \; , \qquad A_2 = 0.012952 \; , \qquad A_3 = 0.016907 \; .
$$
To fourth order,
$$
S_4^*(x) = \frac{x}{9} \left ( \left ( \left ( \left ( 1 + A_1 x^2
\right )^2 + A_2 x^4 \right )^{3/2} + A_3 x^6 \right )^{4/3} +
A_4 x^8 \right )^{-1/8}\; ,
$$
where
$$
A_1 = 0.133333 \; , \qquad A_2 = 0.012952 \; , \qquad A_3 = 0.002757 \; ,
\qquad A_4 = 0.004636 \; .
$$
To fifth order,
$$
S_5^*(x) = \frac{x}{9} \left ( \left ( \left ( \left ( \left ( 1 + A_1 x^2
\right )^2 + A_2 x^4 \right )^{3/2} + A_3 x^6 \right )^{4/3} +
A_4 x^8 \right )^{5/4} + A_5 x^{10} \right )^{-1/10} \; ,
$$
where
$$
A_1 = 0.133333 \; , \qquad A_2 = 0.012952 \; , \qquad A_3 = 0.002757 \; ,
$$
$$
A_4 = 0.000578 \; , \qquad A_5 = 0.001285 \; .
$$
And to sixth order,
$$
S_6^*(x) = \frac{x}{9} \left ( \left ( \left ( \left ( \left ( \left (
1 + A_1 x^2 \right )^2 + A_2 x^4 \right )^{3/2} + A_3 x^6 \right )^{4/3} +
A_4 x^8 \right )^{5/4} + \right. \right.
$$
$$
\left. \left. +
A_5 x^{10} \right )^{6/5} + A_6 x^{12}
\right )^{-1/12} \; ,
$$
where
$$
A_1 = 0.133333 \; , \qquad A_2 = 0.012952 \; , \qquad A_3 = 0.002757 \; ,
$$
$$
A_4 = 0.000578 \; , \qquad A_5 = 0.000137 \; , \qquad A_6 = 0.000356 \; .
$$
All these approximants converge to $\pi/15$, as $x \rightarrow \infty$. The higher the approximant order, the faster the convergence.

\subsection{Debye function}

The $n$-th order Debye function is defined \cite{Abramowitz_13} through the integral representation
$$
D(n,x) \equiv \frac{n}{x^n} \int_0^x \frac{t^n}{e^t -1 } \; dt \; .
$$
For $|x| < 2 \pi$ and $n \geq 1$, it possesses the expansion
$$
D(n,x) \simeq 1 \; - \; \frac{n}{2(n+1)} \; x \; + \;
n \sum_{k=1}^\infty \frac{B_{2k}}{(2k+n)(2k)!} \; x^{2k} \; ,
$$
in which $B_{2k}$ are Bernoulli numbers. At large $x$ and ${\rm Re}\; n > 0$,
one has
$$
D(n,x) \simeq \frac{C_n}{x^n} \qquad ( x\rightarrow \infty , \; {\rm Re}\; n>0 ) \; ,
$$
where
$$
C_n \equiv n \Gamma(n+1) \zeta(n+1) \; .
$$

Below, we consider the case of $n = 3$, corresponding to the Debye function
$$
D(x) \equiv D(3,x) = \frac{3}{x^3} \int_0^x \frac{t^3}{e^t-1} \; dt \; .
$$
The small-variable expansion for the latter takes the form
$$
D(x) \simeq 1 - \; \frac{3}{8} \; x + \sum_{k=1}^\infty a_{2k} x^{2k}
\qquad ( x \rightarrow 0 ) \; ,
$$
in which
$$
a_{2k} = \frac{B_{2k}}{(2k+3)(2k)!} \; .
$$
While the large-variable behavior is given by the expression
$$
D(x) \simeq \frac{C_3}{x^3} \qquad ( x \rightarrow \infty) \; ,
$$
with
$$
C_3 = \frac{\pi^4}{5} = 19.481818 \; .
$$

Constructing the root approximant
$$
D_5^*(x) = \frac{x}{9} \left ( \left ( \left ( \left ( \left ( 1 + A_1 x
\right )^2 + A_2 x^2 \right )^{3/2} + A_3 x^3 \right )^{4/3} +
A_4 x^4 \right )^{5/4} + A_5 x^5 \right )^{-3/5} \; ,
$$
we compare it with the exact numerical values of the function $D(x)$ and find that $D^*_5(x)$ approximates well this function in the whole region of
$x \in [0,\infty]$, with the maximal error of $15 \%$ at $x = 5$. The best two-point Pad\'{e} approximant of the same order, $P_{1/4}(x)$, is less accurate, yielding the maximal error of $33 \%$ at $x = 15$.

\subsection{Fermi-Dirac integral}

The general form of the $j$-th order Fermi-Dirac integral is
$$
F(j,x) = \frac{1}{\Gamma(j+1)} \int_0^\infty \frac{t^j}{e^{t-x}+1} \; dt \; .
$$
Its asymptotic expansions are known \cite{Dingle_14}.

For concreteness, let us consider the zero-order case that reduces to the function
$$
F(x) \equiv F(0,x) = \ln \left ( 1 + e^x \right ) \; .
$$
At small $x$, this function tends to $\ln 2$, and at large $x$, we have
$$
F(x) \simeq x \qquad ( x \rightarrow \infty ) \; .
$$

The root approximant
$$
F_5^*(x) = \ln 2 \left ( \left ( \left ( \left ( \left ( 1 + A_1 x
\right )^2 + A_2 x^2 \right )^{3/2} + A_3 x^3 \right )^{4/3} +
A_4 x^4 \right )^{5/4} + A_5 x^5 \right )^{1/5} \; ,
$$
where
$$
A_1 = 0.721348 \; , \qquad A_2 = 0.360674 \; , \qquad A_3 = 0.390257 \; ,
$$
$$
A_4 = 0.410334 \; , \qquad A_5 = 4.294519 \; ,
$$
provides an accurate approximation for the function $F(x)$ in the whole region of $x \in [0,\infty]$, the maximal error being $5 \%$. The two-point Pad\'{e} approximant $P_{3/2}(x)$ is slightly less accurate, with the maximal error of $6 \%$.

\subsection{Fekete-Szeg\"o problem}

The problem of maximizing the absolute value of a functional in subclasses of normalized functions is called the Fekete-Szeg\"o problem
\cite{Fekete_15,Dziok_16}. The Fekete-Szeg\"o functional is bounded by the function
$$
f(x) = 1 + 2 \exp \left ( - \; \frac{2x}{1-x} \right ) \; ,
$$
where $0 < x < 1$.

In order to consider the interval $[0, \infty]$, as in other examples, we can use the change of the variable
$$
x = \frac{z}{1+z} \; , \qquad z = \frac{x}{1-x} \; .
$$
Then $z \rightarrow \infty$ as $x \rightarrow 1$. Expanding $F(z)$ at small $z$ gives
$$
F(z) \equiv f(x(z)) \simeq 3 - 4z + 4z^2 -\; \frac{8}{3} \; z^3 +
\frac{4}{3} \; z^4 \; - \; \frac{8}{15} \; z^5 \; .
$$
The root approximant $F^*_3(z)$ uniformly approximates the function $F(z)$
on the interval $z \in [0,\infty]$, with the maximal error about $10 \%$.
The two-point Pad\'{e} approximant $P_{2/2}(z)$ is worse, having the maximal error twice larger than the root approximant $F^*_3(z)$.

\section{Some useful tricks}

It is important to mention some tricks allowing for the convenient use of the method. Below, we discuss the interchange of small-variable and large -variable limits and the problem of dealing with logarithms.

\subsection{Inversion of expansions}

In the above examples, we have considered functions, whose expansions are better known for the small-variable limit, while a few, or just a single term, are available in the large-variable limit. But generally, the small-variable and large-variable limits are interchangeable. In those cases, when the large-variable expansion in powers of $1/x$ provides a number of terms and this expansion enjoys better convergence properties,
it is possible to inverse the small-variable limit to the large-variable limit by using the variable change $x = 1/t$. Then, instead of the function
$f(x)$, we consider the function
\begin{equation}
\label{11}
F(t) \equiv f \left ( \frac{1}{t} \right ) \; , \qquad t = \frac{1}{x} \; .
\end{equation}

The small-variable limit (1) becomes the large-variable limit
\begin{equation}
\label{12}
F(t) \simeq F^{(k)}(t) \equiv f_k\left ( \frac{1}{t} \right ) \qquad
( t \rightarrow \infty ) \; ,
\end{equation} in which
\begin{equation}
\label{13}
F^{(k)}(t) = F_\infty(t) \left ( 1 +
\sum_{n=1}^k \frac{a_n}{t^n} \right ) \; ,
\end{equation} with
$$
F_\infty(t) \equiv f_0\left ( \frac{1}{t} \right ) = A t^{-\alpha} \; .
$$

Conversely, the large-variable behavior (4) transforms to the small-variable behavior
\begin{equation}
\label{14}
F(t) \simeq F_p(t) \equiv f^{(p)} \left ( \frac{1}{t} \right ) \qquad
( t \rightarrow 0 ) \; ,
\end{equation} in which
\begin{equation}
\label{15}
F_p(t) = F_0(t) \left ( 1 + \sum_{n=1}^p b_n t^n \right ) \; ,
\end{equation} where
$$
F_0(t) \equiv f_\infty \left ( \frac{1}{t} \right ) = B t^{-\beta} \; .
$$

After this change of the variable it is straightforward to employ the same procedure of constructing the root approximants, as is explained in Sec. 2.

More generally, it is possible to use the change of the variable $t = 1/x^s$,
with a positive power $s > 0$, so that again $t \rightarrow \infty$, when $x \rightarrow 0$.

\subsection{Example of inversion}

As an illustration of the inversion procedure, we give below a typical example,
discussing it rather briefly, since the whole method of constructing the root approximants is the same as before.

Let us consider the partition function of the so-called zero-dimensional oscillator, or the generating functional of zero-dimensional $\varphi^4$ field theory, which is defined through the integral
$$
I(x) = \frac{1}{\sqrt{\pi}} \int_{-\infty}^\infty \exp \left ( - \varphi^2
- x \varphi^4 \right ) \; d\varphi \; ,
$$
where $x$ plays the role of a coupling parameter. In the weak-coupling limit,
one has \cite{Yukalov_17} the asymptotic expansion
$$
I(x) \simeq 1 + \sum_{n=1}^\infty a_n x^n \qquad ( x \rightarrow 0 ) \; ,
$$
in which the coefficients are
$$
a_n = \frac{(-1)^n}{\sqrt{\pi}\; n!} \;
\Gamma\left ( 2n + \frac{1}{2} \right ) \; .
$$
For instance
$$
a_1 = -\; \frac{3}{4} \; , \qquad a_2 = \frac{105}{32} \; , \qquad a_3 = - \; \frac{3465}{128} \; ,
$$
and so on.

The strong-coupling expansion reads as
$$
I(x) \simeq 1.022765 \; x^{-1/4} - 0.345684\; x^{-3/4} +
0.127846 \; x^{-5/4} \qquad ( x \rightarrow \infty) \; .
$$
Here the strong-coupling expansion provides a number of terms. Moreover,
the absolute values of the coefficients in this expansion diminish with increasing order, contrary to the coefficients $a_n$ in the weak-coupling expansion, which grow as $n^n$ with increasing order $n$. This makes the strong-coupling expansion more suitable for constructing root approximants.

Resorting to the change of the variable $x = 1/t^4$, we consider the function
$J(t) \equiv I(1/t^4)$ and follow the scheme of the previous section. We define the root approximants $J^*_k(t)$ that give us the approximants
$I^*_k(x) = J^*_k(1/x^{1/4})$ for the sought function. Found in that way approximant $I^*_3(x)$ has the maximal error of $5 \%$ for the whole range of $x \in [0, \infty]$. For comparison, the Pad\'{e} approximant $P_{1/2}(x)$
has the maximal error of about $20 \%$, which is much less accurate.

\subsection{Dealing with logarithms}

It is worth paying attention to the problem of series involving logarithms,
which often appear in physics applications. Such series do not yield any complication for the method of root approximants described here.There are two equivalent ways of treating such series. Thus, if a series contains the terms with $x^n$, $x^{n+1}$, and with $x^n \ln x$, then it is admissible to consider as the terms of one order either those containing
$x^n$ and $x^n \ln x$ or the terms $x^{n+1}$ and $x^n \ln x$.

As an illustration, let us consider, e.g., the typical form of such a series involving logarithms as that one arising in the Nambu-Iona Lasinio model
\cite{Kunihiro_18} and leading to the function
$$
f(x) = x \left [ \sqrt{1 + x^2} \; - \; x^2 \ln\left (
\frac{1+\sqrt{1+x^2}}{x} \right ) \right ] \; ,
$$
where $x$ plays the role of mass. At asymptotically small $x$, it follows
$$
f(x) \simeq x + \left ( \frac{1}{2} - \ln 2 + \ln x \right ) x^3 \qquad
(x \rightarrow 0 ) \; .
$$
While at large $x$, one has
$$
f(x) \simeq \frac{2}{3} - \frac{1}{5x^2} + \frac{3}{28x^4} \qquad
( x \rightarrow \infty ) \; .
$$
Keeping in mind the dependence of the last expansion on $1/x^2$, it is convenient to use the variable $z = 1/x^2$. The root approximant, satisfying the required limits, has the form
$$
f_4^*(x) = \frac{2}{3} \left ( \left ( \left ( 1 + A_1 z \right )^2 +
A_2 z^2 \right )^{3/2} + A_3 z^2 \ln ( 1 + z ) + A_4 z^3 \right )^{-1/6} \; ,
$$
with all parameters uniquely defined by the given expansions. This expression approximates well the initial function $f(x)$, with the maximal error of $2 \%$
at $x \approx 2$. Contrary to this, the best Pad\'{e} approximant of the same order has the error of $11 \%$ at $x \approx 1.5$.

\section{Ground-state energy of electron gas}

Important and not trivial problems arise when studying the properties of charged systems \cite{Loos_19,Cioslowski_20,Cioslowski_21}. Here we show how our method works for the case of homogeneous electron systems.

\subsection{One-dimensional electron gas}

The Hartree-Fock part of the uniform electron energy is well known. The problem arises in calculating the {\it correlation energy}. The latter is usually presented in a reduced dimensionless form $\varepsilon(r_s)$ as a function of the Seitz radius $r_s$. High-density expansion for one-dimensional uniform electron gas \cite{Loos_22} corresponds to small
$r_s$, when for the correlation energy one has
$$
\varepsilon(r_s) \simeq C + 0.00845 r_s \qquad (r_s \rightarrow 0 ) \; ,
$$
where
$$
C = -\; \frac{\pi^2}{360} = - 0.027416 \; .
$$
The low-density expansion \cite{Loos_22} implies large $r_s$, when
$$
\varepsilon(r_s) \simeq \frac{b_1}{r_s} + \frac{b_2}{r_s^{3/2}} \qquad
(r_s \rightarrow \infty) \; ,
$$
where
$$
b_1 = - \left ( \ln \sqrt{2\pi} \; - \; \frac{3}{4} \right ) =
-0.168939 \; , \qquad b_2 = 0.359933 \; .
$$

The root approximant, enjoying the same expansions, but valid for arbitrary
$r_s$ reads as
$$
\varepsilon_3^*(r_s) = -\; \frac{\pi^2}{360}\; \left ( \left ( ( 1+ A_1 r_s)^{3/2}
+ A_2 r_s^2 \right )^{5/4} + A_3 r_s^3 \right )^{-1/3} \; ,
$$
with the parameters
$$
A_1 = 0.493150 \; , \qquad A_2 = 0.056122 \; , \qquad A_3 = 0.004274 \; .
$$
Comparing the prediction of the root approximant with the data from diffusion Monte Carlo calculations \cite{Loos_22} in the interval $0 < r_s < 20$, we find that the maximal error of $\varepsilon^*_3$ is $8\%$. Pad\'{e}
approximants give the errors between $2\%$ and $10\%$. Thus,
$P_{1/2}(\sqrt{r_s})$ has the error of $2\%$, while $P_{0/3}(\sqrt{r_s})$ has the maximal error of $10\%$. The Cioslowski interpolation method
\cite{Cioslowski_23} results \cite{Loos_22} in a better accuracy of $1\%$.
However, this method includes an additional parameter that is fitted from numerical Monte Carlo calculations. While our aim has been in constructing good approximations without fitting parameters, being based only on asymptotic expansions. The principal importance of avoiding fitting parameters is crucial for those problems where no exact numerical data are available.

\subsection{Two-dimensional electron gas}

Correlation energy of a homogenous two-dimensional electron gas was studied in several articles, e.g., in Refs.
\cite{Sim_24,Tanatar_25,Kwon_26,Attaccalite_27,Gori_28,Constantin_29,
Drummond_30,Loos_31}. In high-density limit (small $r_s$), the ground-state energy reads \cite{Loos_31} as
$$
E_0(r_s) \simeq \frac{c_{-2}}{r_s^2} + \frac{c_{-1}}{r_s} +
\varepsilon(r_s) \qquad (r_s \rightarrow 0) \; ,
$$
where the first two terms constitute the Hartree-Fock energy, with
$$
c_{-2} = \frac{1}{2} \; , \qquad c_{-1} = -\; \frac{4\sqrt{2}}{3\pi} \; .
$$
And the last term is the correlation energy
$$
\varepsilon(r_s) \simeq c_0 + c_1' r_s \ln r_s \qquad (r_s \rightarrow 0) \; ,
$$
with the coefficients
$$
c_0 = -0.192495 \; , \qquad c_1' = -\sqrt{2} \left ( \frac{10}{3\pi}
- 1 \right ) = - 0.0863136 \; .
$$

In the low-density limit (large $r_s$) the asymptotic expansion for the correlation energy can be written \cite{Kwon_26} as
$$
\varepsilon(r_s) \simeq \frac{b_1}{r_s} + \frac{b_2}{r_s^{3/2}} +
\frac{b_3}{r_s^2} \qquad (r_s \rightarrow \infty) \; ,
$$
where
$$
b_1 = -0.472189 \; , \qquad b_2 = 0.4964 \; , \qquad b_3 = 0.5297 \; .
$$

For intermediate $r_s$, there have been suggested
\cite{Attaccalite_27,Gori_28,Drummond_30} several phenomenological expressions with parameters fitted from Monte Carlo calculations. Thus,
Gori-Giorgi et al. \cite{Gori_28} suggested the form
$$
\varepsilon(r_s) = A_0 + \left ( B_0 r_s + C_0 r_s^2 + D_0 r_s^3
\right ) \; \ln \left ( 1 +
\frac{1}{E_0 r_s + F_0 r_s^{3/2}+G_0 r_s^2 +H_0 r_s^3}
\right ) \; ,
$$
with the parameters
$$
A_0 = - 0.1925 \; , \qquad B_1 = 0.0863136 \; , \qquad C_0 = 0.057234 \; , \qquad D_0 = 0.003362896 \; .
$$
$$
E_0 = 1.0022 \; , \qquad F_0 = -0.02069 \; , \qquad G_0 = 0.34 \; , \qquad H_0 = 0.01747 \; .
$$
This expression can be used as a numerical result for estimating the accuracy of approximate analytic formulas.

The root approximant, satisfying all asymptotic expansions reads as
$$
\varepsilon_5^*(r_s) = \frac{b_1}{r_s} \left ( \left ( \left ( 1 +
\frac{A_1}{\sqrt{r_s}} \right )^2 + \frac{A_2}{r_s} \right )^{3/2}
+ \frac{A_3}{r_s}\; \ln \left ( 1 + \frac{1}{\sqrt{r_s}} \right )
+ \frac{A_4}{r_s^{3/2}} + \frac{A_5}{r_s^2} \right )^{-1/2} \; ,
$$
where the parameters are
$$
b_1 = - 0.472189 \; , \qquad A_1 = 0.700849 \; , \qquad A_2 = 2.723702 \; ,
$$
$$
A_3 = 10.792193 \; , \qquad A_4 = -5.764339 \; , \qquad A_5 = 6.017150 \; .
$$
The error of this approximant is about $5\%$.

\section{Systems with spherical symmetry}

Finite quantum systems often enjoy spherical symmetry. Below, we consider two examples of such systems that are important for applications.

\subsection{Energy of harmonium atoms}

An $N$-electron harmonium atom is described by the Hamiltonian
$$
\hat H =
\frac{1}{2} \sum_{i=1}^N \left ( - \nabla_i^2 + \omega^2 r_i^2 \right ) +
\frac{1}{2} \sum_{i\neq j}^N \frac{1}{r_{ij}} \; ,
$$
where dimensionless units are employed and
$$
r_i \equiv | {\bf r}_{i} | \; , \qquad r_{ij} \equiv | {\bf r}_i - {\bf r}_j | \; .
$$
This Hamiltonian provides a rather realistic modeling of trapped ions,
quantum dots, and some other finite systems, such as atomic nuclei and metallic grains \cite{Birman_32}. This is why the energy of harmonium atoms has been intensively studied
\cite{Cioslowski_33,Cioslowski_34,Cioslowski_35,Cioslowski_36,Cioslowski_37}.
Here we show that root approximants give a good approximation for the energy of such systems. We consider the ground-state energy of a two-electron harmonium.

At a shallow harmonic potential, the energy can be expanded
\cite{Cioslowski_23} in powers of $\omega$, so that
$$
E(\omega) \simeq E_k(\omega) \qquad (\omega\rightarrow 0) \; ,
$$
with the truncated series
$$
E_k(\omega) = \sum_{n=0}^k c_n \omega^{(2+n)/3} \; .
$$
For instance, to third order, we get
$$
E_3(\omega) = c_0 \omega^{2/3} + c_1 \omega + c_2 \omega^{4/3} \; ,
$$
with the coefficients
$$
c_0 = \frac{3}{2^{4/3}} = 1.19055 \; , \qquad c_1 = \frac{1}{2} \; \left ( 3 + \sqrt{3} \right ) = 2.36603 \; ,
\qquad c_2 = \frac{7}{36}\; 2^{-2/3} = 0.122492 \; .
$$

And for a rigid potential, the energy is approximated \cite{Cioslowski_23}
as
$$
E(\omega) \simeq E^{(p)}(\omega) \qquad (\omega\rightarrow\infty) \; ,
$$
where
$$
E^{(p)}(\omega) = \sum_{n=0}^p b_n \omega^{(2-n)/2} \; .
$$
To fourth order, one has
$$
E^{(4)}(\omega) = b_0 \omega + b_1 \omega^{1/2} + b_2 + b_3 \omega^{-1/2} \; ,
$$
where
$$
b_0 = 3 \; , \qquad b_1 = \sqrt{\frac{2}{\pi} } = 0.797885 \; , \qquad b_2 = -\;\frac{2}{\pi}\; \left ( 1 - \; \frac{\pi}{2} +
\ln 2 \right ) = - 0.077891 \; ,
$$
$$
b_3 = \left ( \frac{2}{\pi}\right )^{3/2}\; \left [ 2 - 2G - \;
\frac{3}{2}\; \pi + ( \pi + 3) \ln 2 + \frac{3}{2}\; ( \ln 2)^2 - \;
\frac{\pi^2}{24} \right ) = 0.0112528 \; ,
$$
with the Catalan constant
$$
G \equiv \sum_{n=0}^\infty \frac{(-1)^n}{(2n+1)^2} =
0.91596559 \; .
$$

The root approximant, respecting all given small-$\omega$, as well as large-$\omega$ expansions, is
$$
E_6^*(\omega) = c_0 \omega^{2/3} \left ( \left ( \left ( \left ( \left (
\left ( 1 + A_1 \omega^{1/3} \right )^{1/2} + A_2 \omega^{2/3} \right )^{3/4}
+ A_3 \omega \right )^{5/6} + A_4 \omega^{4/3} \right )^{7/8} + \right. \right.
$$
$$
\left. \left. +
A_5 \omega^{5/3} \right )^{9/10} + A_6 \omega^2 \right )^{1/6} \; ,
$$
with the parameters
$$
c_0 = 1.19055 \; , \qquad A_1 = 48.4532 \; , \qquad A_2 = 564.108 \; ,
$$
$$
A_3 = 1088.39 \; , \qquad A_4 = 1221.08 \; , \qquad A_5 = 796.791 \; , \qquad A_6 = 256 \; .
$$
We estimate the accuracy of the root approximant comparing it with the numerical data from Ref. \cite{Matito_38} and find that its maximal error is only $0.9 \%$. Note that Pad\'{e} approximants cannot be used in the case of harmonium, since the small-variable and large-variable asymptotic expansions are incompatible.

\subsection{Energy of two-electron spherium}

The two-electron spherium is a system consisting of two electrons that are confined to the surface of a sphere of radius $R$. The ground-state energy of the system \cite{Cioslowski_23,Loos_39} possesses the small-radius expansion
$$
E(R) \simeq \frac{1}{R} + c_0 + c_1 R + c_2 R^2 + c_3 R^3 \qquad
(R \rightarrow 0) \; ,
$$
in which
$$
c_0 = 4\ln 2 - 3 = - 0.22741128 \; , \qquad c_1 = 8(\ln 2)^2 - 40 \ln 2 + 24 = 0.11773689 \; ,
$$
$$
c_2 = -0.05027560 \; , \qquad c_3 = 0.01395783 \; .
$$
The coefficients $c_2$ and $c_3$ can also be expressed in closed forms that,
however, are too much cumbersome \cite{Loos_39}, because of which we give here only their numerical values.

In the large-radius limit, the energy has the expansion
$$
E(R) \simeq \frac{1}{2R} + \frac{1}{2R^{3/2}} \; - \; \frac{1}{8R^2} \;
- \; \frac{1}{128 R^{5/2}} \qquad (R \rightarrow \infty ) \; .
$$

The root approximant can be writen in the form
$$
E_5^*(R) = \frac{1}{R} + c_0 \left ( \left ( \left (
\left ( ( 1 + A_1 R)^{3/2} + A_2 R^2 \right )^{5/4} + A_3 R^3 \right )^{7/6}
+ A_4 R^4 \right )^{9/8} + A_5 R^5 \right )^{-1/5} \; ,
$$
where
$$
A_1 = 1.05188915 \; , \qquad A_2 = 0.56453530 \; , \qquad A_3 = 0.36000617 \; ,
$$
$$
A_4 = 0.12606787 \; , \qquad A_5 = 0.01946301 \; .
$$
Comparing this expression with numerical data \cite{Loos_39}, we find that the maximal error occurs at $R = 20$, being only $0.1 \%$. The best Pad\'{e}
approximant $P_{5/5}(\sqrt{R})$ is much less accurate, having the maximal error, also at $R = 20$, but an order larger, $1.5 \%$.

\section{Discussion}

We have described a simple and general method for interpolating functions between their small-variable and large-variable asymptotic expansions. The method is based on the construction of self-similar root approximants enjoying the general form
$$
f^*_k(x) = f_0(x) \left ( \left ( \left ( \ldots ( 1 + A_1 x )^{n_1} +
A_2 x^2 \right )^{n_2} + A_3 x^3 \right )^{n_3} + \ldots +
A_k x^k \right )^{n_k} \; .
$$
All parameters $A_i$ can be uniquely defined through the corresponding asymptotic expansions. By changing the variable, it is easy to inverse the expansions between the small-variable and large-variable limits.

Our aim has been to suggest a method that would involve no fitting parameters. This is especially important in those complicated cases, where numerical data in the whole region of the variable are not available. The absence of fitting parameters makes our aproach different from other intrepolation methods, such as the Cioslowski method \cite{Cioslowski_23}.

We have demonstrated the method of root approximants by several examples,
whose structure is typical for many applications, including the hard-core scattering problem, Debye function, Fermi-Dirac integral, Fekete-Szeg\"{o}
problem, zero-dimensional oscillator, homogeneous electron gas, harmonium atom, and spherium.

We have analyzed several more problems, e.g., the interpolation of the polaron mass between weak-coupling and strong-coupling limits studied earlier by the Feynman variational procedure \cite{Feynman_39} and by other methods
\cite{Feranchuk_40,Alexandrou_41,Kleinert_42,Kornilovitch_43}. Our approach provides approximations, whose accuracy is comparable or better than that of other methods, being at the same time more simple.

Generally, the suggested method provides the accuracy not worse than the method of Pad\'{e} approximants and in the majority of cases is more accurate than the latter.

Except the root approximants of the general form (9), we also have considered
{\it additive approximants} represented by the sums
$$
f^*_{M/N}(x) = \sum_{i=1}^{(M+N)/2} A_i ( 1 + B_i x)^{n_i} \; .
$$
This type of expressions can be considered either as additive root approximants or an additive variant resulting from self-similar factor approximants
\cite{Gluzman_44}.

For example, in the case of one-dimensional electron gas, the correlation energy is approximated as
$$
\varepsilon^*_{2/2}(r_s) = A_1 ( 1 + B_1 r_s )^{-1} +
A_2 ( 1 + B_2 r_s )^{-3/2} \; ,
$$
with the parameters
$$
A_1 = -0.044941 \; , \qquad A_2 = 0.017526 \; , \qquad B_1 = 0.266023 \; , \qquad B_2 = 0.133344 \; .
$$
This expression has the maximal error of $11\%$. However a more detailed analysis of such additive approximants requires a separate investigation,
which is out of the scope of the present paper.

\vskip 2cm

\acknowledgments{Acknowledgments}

One of the authors (V.I.Y.) acknowledges financial support from the Russian Foundation for Basic Research (grant 14-02-00723) and is grateful for useful discussions to E.P. Yukalova.

\conflictofinterests{Conflicts of Interest}

The authors declare no conflict of interest.

\newpage

\end{document}
\title{
Projection method and new formulation of leading-order anisotropic hydrodynamics
}

\begin{abstract}
The introduced earlier projection method for boost-invariant and cylindrically symmetric systems is used to introduce a new formulation of anisotropic hydrodynamics that allows for three substantially different values of pressure acting locally in three different directions. Our considerations are based on the Boltzmann kinetic equation with the collision term treated in the relaxation time approximation and the momentum anisotropy is included explicitly in the leading term of the distribution function. A novel feature of our work is the complete analysis of the second moment of the Boltzmann equation, in addition to the zeroth and first moments that have been analyzed in earlier studies. We define the final equations of anisotropic hydrodynamics in the leading order as a subset of the analyzed moment equations (and their linear combinations) which agree with the Israel-Stewart theory in the case of small pressure anisotropies.
\end{abstract}

\section{Introduction}
\label{sect:intro}

Successful applications of relativistic viscous hydrodynamics in the description of heavy-ion collisions at RHIC (Relativistic Heavy-Ion Collider) and the LHC (Large Hadron Collider) triggered large interest in the development of the hydrodynamic framework
\cite{Israel:1976tn,Israel:1979wp,
Muronga:2001zk,Muronga:2003ta,
Baier:2006um,Baier:2007ix,
Romatschke:2007mq,Dusling:2007gi,Luzum:2008cw,
Song:2008hj,El:2009vj,PeraltaRamos:2010je,
Denicol:2010tr,Denicol:2010xn,
Schenke:2010rr,Schenke:2011tv,
Bozek:2009dw,Bozek:2011wa,
Niemi:2011ix,Niemi:2012ry,
Bozek:2012qs,Denicol:2012cn,Jaiswal:2013npa}. An example of the new approach to relativistic dissipative hydrodynamics is {\it anisotropic hydrodynamics} \cite{Florkowski:2010cf,Martinez:2010sc,
Ryblewski:2010bs,Martinez:2010sd,
Ryblewski:2011aq,Martinez:2012tu,
Ryblewski:2012rr,Ryblewski:2013jsa,
Florkowski:2012ax,Florkowski:2012as} --- the framework where effects connected with the expected high pressure anisotropy of the produced matter are included in the leading order of the hydrodynamic expansion. Very recently, also the second order anisotropic hydrodynamics has been formulated by Bazow, Heinz, and Strickland~\cite{Bazow:2013ifa}. The~new approach introduced in \cite{Bazow:2013ifa} allows for description of arbitrary transverse expansion of matter in the way which becomes consistent with more traditional approaches to dissipative hydrodynamics in the small anisotropy limit. This formalism uses, however, the Romatschke-Strickland form \cite{Romatschke:2003ms} of the distribution function in the leading order, which implies that the two components of pressure in the transverse plane may be different only if the second-order corrections are taken into account.

In this work we present a new methodology for including three substantially different pressure components already in the leading order of hydrodynamic expansion. Our approach is based on the projection method introduced in Ref.~\cite{Florkowski:2011jg}, which has turned out to be a convenient tool to replace complicated tensor equations of relativistic hydrodynamics by a small set of scalar equations. We take into account the radial expansion of the produced matter (in addition to the longitudinal Bjorken flow) but our considerations are confined to the case with cylindrical symmetry. We generalize the Romatschke-Strickland form to the case where all three pressure components may be different. Compared to earlier works on anisotropic hydrodynamics in the leading order, where the zeroth and first moments of the Boltzmann equation have been studied, an important novel feature of our present work is the analysis of the second moment of the Boltzmann equation. We argue that a successful agreement with the Israel-Stewart theory in the limit of small anisotropies may be achieved if we take into account two equations constructed from the second moment of the Boltzmann equation rather than taking one equation from the zeroth moment and another equation from the second moment.

In our opinion, the use of the second moment sheds new light on the framework of anisotropic hydrodynamics. We expect, that the formalism developed in this paper may be a better starting point for the second-order anisotropic hydrodynamics developed according to the guidelines presented in Ref.~\cite{Bazow:2013ifa}. In addition, the presented approach may be generalized in the natural way to the 2+1 case where the cylindrical symmetry is relaxed.

The paper is organized as follows: In the next Section we introduce the four-vectors $U$, $X$, $Y$, and $Z$ used to decompose different tensors used in our formalism, in particular, to decompose the expansion and shear tensors. In Sec.~\ref{sect:BE} we discuss the Boltzmann equation in the relaxation time approximation and introduce the anisotropic distribution function characterized by three anisotropy parameters. The zeroth moment of the Boltzmann equation is discussed shortly in Sec.~\ref{sect:0mom}. In Sec.~\ref{sect:1mom} we characterize the energy-momentum conservation law, the Landau matching condition, and the close-to-equilibrium limit of the energy-momentum tensor. The formulas for the energy-density and pressure of anisotropic systems are presented in Sec.~\ref{sect:enedenaniso}. Sec.~\ref{sect:2mom} contains the analysis of the second moment of the Boltzmann equation. The most important part of the paper, Sec.~\ref{sect:set}, describes the construction of two equations (out of the complete set of second moment equations) which are finally accepted as the two new equations of anisotropic hydrodynamics in the leading order. The entropy production and its positivity is discussed in Sec.~\ref{sect:ent}. We summarize and conclude in Sec.~\ref{sect:con}. Two appendices containing explicit forms of different expressions and integrals close the paper. Throughout the paper we use natural units where $c=\hbar=k_B=1$ and the metric tensor with the signature $(+,-,-,-)$.

\section{Projection method for boost-invariant and cylindrically symmetric hydrodynamic systems}
\label{sect:projection}

\subsection{Boost-invariant and cylindrically symmetric flow}
\label{sect:flow}

The space-time coordinates and the four-vector describing the hydrodynamic flow are denoted in the standard way as
$x^\mu = \left( t, x, y, z \right)$ and
\begin{equation}
U^\mu = \gamma (1, v_x, v_y, v_z), \quad \gamma = (1-v^2)^{-1/2}.
\label{Umu0}
\end{equation}
For boost-invariant and cylindrically symmetric systems, the scalar quantities may depend only on the (longitudinal) proper time and the radial distance
\begin{equation}
\tau = \sqrt{t^2 - z^2}, \quad r = \sqrt{x^2 + y^2}.
\label{taur}
\end{equation}
In addition, for the boost-invariant hydrodynamic flow (\ref{Umu0}) we may use the following parametrization
\begin{eqnarray}
U^0 = \cosh \theta_\perp \cosh \eta_\parallel, \quad U^1 = \sinh \theta_\perp \cos \phi,
\quad U^2 = \sinh \theta_\perp \sin \phi, \quad U^3 = \cosh \theta_\perp \sinh \eta_\parallel,
\label{Umu}
\end{eqnarray}
where $\theta_\perp=\theta_\perp(\tau,r)$ is the transverse fluid rapidity defined by the formula
\begin{equation}
v_\perp = \sqrt{v_x^2+v_y^2} = \frac{\tanh \theta_\perp}{\cosh\eta_\parallel}.
\label{thetaperp}
\end{equation}
Here $\eta_\parallel$ is the space-time rapidity and $\phi$ is the azimuthal angle
\begin{eqnarray}
\eta_\parallel = \frac{1}{2} \ln \frac{t+z}{t-z},
\quad \phi = \arctan \frac{y}{x}.
\label{etaparphi}
\end{eqnarray}

In addition to $U^\mu$ we define three other four-vectors. The first one, $Z^\mu$, defines the longitudinal direction that plays a special role due to the initial geometry of the collision,
\begin{eqnarray}
Z^0 = \sinh \eta_\parallel, \quad Z^1 = 0, \quad Z^2 = 0, \quad Z^3 = \cosh \eta_\parallel.
\label{Zmu}
\end{eqnarray}
The second four-vector, $X^\mu$, defines a transverse direction to the beam,
\begin{eqnarray}
X^0 = \sinh \theta_\perp \cosh \eta_\parallel, \quad X^1 = \cosh \theta_\perp \cos \phi, \quad X^2 = \cosh \theta_\perp \sin \phi, \quad X^3 = \sinh \theta_\perp \sinh \eta_\parallel,
\label{Xmu}
\end{eqnarray}
while the third four-vector, $Y^\mu$, defines the second transverse direction,
\begin{eqnarray}
Y^0 = 0, \quad Y^1 = -\sin \phi, \quad Y^2 = \cos \phi, \quad Y^3 = 0.
\label{Ymu}
\end{eqnarray}

The four-vector $U^\mu$ is time-like, while the four-vectors $Z^\mu, X^\mu, Y^\mu$ are space-like. In addition, they are all orthogonal to each other,
\begin{eqnarray}
U^2 &=& 1, \quad Z^2 = X^2 = Y^2 = -1, \nonumber \\
U \cdot Z &=& 0, \quad U \cdot X = 0, \quad U \cdot Y = 0, \nonumber \\
Z \cdot X &=& 0, \quad Z \cdot Y = 0, \quad X \cdot Y = 0.
\label{norm}
\end{eqnarray}
All these properties are most easily seen in the {\it local rest frame} of the fluid element (LRF), where we have \mbox{$\theta_\perp = \eta_\parallel = \phi = 0$} and
\begin{eqnarray}
U = (1,0,0,0), \quad Z = (0,0,0,1), \quad X = (0,1,0,0), \quad Y = (0,0,1,0).
\label{LRF}
\end{eqnarray}

In the standard formalism of dissipative hydrodynamics one uses the operator $ \Delta^{\mu \nu} = g^{\mu \nu} - U^\mu U^\nu$, that projects on the three-dimensional space orthogonal to $U^\mu$. It can be shown that
\begin{equation}
\Delta^{\mu \nu} = g^{\mu \nu} - U^\mu U^\nu = -X^\mu X^\nu - Y^\mu Y^\nu - Z^\mu Z^\nu.
\label{Delta}
\end{equation}
Using Eqs. (\ref{norm}) we find that $Z^\mu, X^\mu$ and $Y^\mu$ are the eigenvectors of $\Delta^{\mu \nu}$,
\begin{equation}
\Delta^{\mu}_{\,\, \nu} \,X^\nu = X^\mu, \quad \Delta^{\mu}_{\,\, \nu} \,Y^\nu = Y^\mu, \quad
\Delta^{\mu}_{\,\, \nu} \,Z^\nu = Z^\mu.
\label{eigen}
\end{equation}
In this work, following the method of Ref.~\cite{Florkowski:2011jg}, we use the tensor products of the four-vectors $U, X, Y$, and $Z$ as the basis to decompose all other tensors appearing in the formalism of standard dissipative hydrodynamics and anisotropic hydrodynamics. This allows us to replace complicated tensor equations by a set of scalar equations and to identify the key degrees of freedom in anisotropic hydrodynamics. Various formulas and identities satisfied by the four-vectors $U, X, Y$, and $Z$, and also by their derivatives are listed in Sec.~\ref{sect:explicitr}. We shall refer frequently to those expressions in this paper.

\subsection{Expansion and shear tensors}
\label{sect:expandshear}

For the sake of convenience, we present now explicit forms of the expansion and shear tensors expressing them in terms of $X$, $Y$ and $Z$. In the general case, the expansion tensor is defined by the formula~\cite{Muronga:2003ta}
\begin{equation}
\theta_{\mu \nu} = \Delta^\alpha_\mu \Delta^\beta_\nu \partial_{(\beta} U_{\alpha)},
\label{theta-munu}
\end{equation}
where the brackets denote the symmetric part of $\partial_{\beta} U_{\alpha}$. Using Eqs. (\ref{Umu}) in the definition of the expansion tensor (\ref{theta-munu}) and also using Eqs. (\ref{Zmu})--(\ref{Ymu}), we find that the following decomposition holds for boost-invariant and cylindrically symmetric systems \cite{Florkowski:2011jg}~\footnote{We stress that the subscripts $X$, $Y$, and $Z$ do not denote the Cartesian coordinates but refer typically to the coefficients in the s such as Eq.~(\ref{theta-dec}).}
\begin{equation}
\theta^{\mu \nu} = \theta_X X^\mu X^\nu + \theta_Y Y^\mu Y^\nu + \theta_Z Z^\mu Z^\nu,
\label{theta-dec}
\end{equation}
where
\begin{equation}
\theta_X = - \frac{\partial \theta_\perp}{\partial r} \cosh \theta_\perp
- \frac{\partial \theta_\perp}{\partial \tau} \sinh \theta_\perp, \quad
\theta_Y = - \frac{\sinh \theta_\perp}{r}, \quad
\theta_Z = - \frac{\cosh \theta_\perp}{\tau}.
\label{thetas}
\end{equation}
The contraction of the tensors $\Delta^{\mu \nu}$ and $\theta^{\mu \nu}$ gives the volume expansion parameter $\theta = \Delta^{\mu \nu} \theta_{\mu \nu}$. Equations~(\ref{Delta})--(\ref{theta-munu}) yield
\begin{eqnarray}
\theta = -\theta_X - \theta_Y - \theta_Z.
\label{volexpp}
\end{eqnarray}
It is interesting to check that the volume expansion parameter $\theta$ may be expressed also by the formula $\theta = \partial_\mu U^\mu$.

In addition to the expansion tensor $\theta^{\mu\nu}$ we shall use the shear tensor $\sigma_{\mu \nu}$. The latter is defined by the formula
\begin{equation}
\sigma_{\mu \nu} = \theta_{\mu \nu} - \frac{1}{3} \Delta_{\mu \nu} \theta.
\label{sigma1}
\end{equation}
With the help of the decompositions (\ref{Delta}) and (\ref{theta-dec}) we may write
\begin{equation}
\sigma^{\mu \nu} = \sigma_X X^\mu X^\nu + \sigma_Y Y^\mu Y^\nu + \sigma_Z Z^\mu Z^\nu,
\label{sigma-dec}
\end{equation}
where
\begin{eqnarray}
\sigma_X &=& \frac{\theta}{3}+\theta_X = \frac{\cosh \theta_\perp}{3 \tau} +
\frac{\sinh\theta_\perp}{3r}
-\frac{2}{3} \frac{\partial\theta_\perp}{\partial \tau} \sinh\theta_\perp
-\frac{2}{3} \frac{\partial\theta_\perp}{\partial r} \cosh\theta_\perp , \label{sigmaX}
\end{eqnarray}
\begin{eqnarray}
\sigma_Y &=& \frac{\theta}{3}+\theta_Y = \frac{\cosh \theta_\perp}{3 \tau} -
\frac{2 \sinh\theta_\perp}{3r}
+\frac{1}{3} \frac{\partial\theta_\perp}{\partial \tau} \sinh\theta_\perp
+\frac{1}{3} \frac{\partial\theta_\perp}{\partial r} \cosh\theta_\perp , \label{sigmaY}
\end{eqnarray}
and
\begin{eqnarray}
\sigma_Z &=& \frac{\theta}{3}+\theta_Z
= -\frac{2\cosh \theta_\perp}{3 \tau} +
\frac{\sinh\theta_\perp}{3r}
+\frac{1}{3} \frac{\partial\theta_\perp}{\partial \tau} \sinh\theta_\perp
+\frac{1}{3} \frac{\partial\theta_\perp}{\partial r} \cosh\theta_\perp . \label{sigmaZ}
\end{eqnarray}
In agreement with general requirements we find that
\begin{eqnarray}
\sigma_X+\sigma_Y+\sigma_Z=0.
\label{sumsigma}
\end{eqnarray}
In the case where the radial flow is absent \mbox{$\sigma_X = \sigma_Y = 1/(3 \tau)$} and \mbox{$\sigma_Z = -2/(3 \tau)$}, which agrees with earlier findings \cite{Muronga:2003ta}. For brevity of notation, expressions such as Eqs.~(\ref{theta-dec}) or (\ref{sigma-dec}) will be written shortly as the sums
\begin{eqnarray}
\theta^{\mu\nu} = \sum_I \theta_I I^\mu I^\nu,
\qquad
\sigma^{\mu\nu} = \sum_I \sigma_I I^\mu I^\nu,\end{eqnarray}
where $I$ takes the values $X$, $Y$, and $Z$. Similarly, Eqs.~(\ref{volexpp}) and (\ref{sumsigma}) may be written as
\begin{eqnarray}
\theta = -\sum_I \theta_I, \qquad
\sum_I \sigma_I = 0.
\end{eqnarray}

\section{Boltzmann equation and anisotropic distribution functions}
\label{sect:BE}

The basis for our considerations is the Boltzmann equation treated in the relaxation time approximation
\cite{Bhatnagar:1954zz,Baym:1984np,Baym:1985tna,
Heiselberg:1995sh,Wong:1996va}
\begin{equation}
p\cdot\partial f = \frac{p\cdot U}{\tau_{\rm eq}}
\left(f_{\rm eq} - f \right).
\label{RTA}
\end{equation}
In Eq.~(\ref{RTA}) $f$ is the phase-space distribution function, $f_{\rm eq}$ is the equilibrium distribution function, and $\tau_{\rm eq}$ is the relaxation time. In different frameworks of anisotropic hydrodynamics which have been studied so far, one assumes that the distribution function $f$ is very well approximated by the Romatschke-Strickland form \cite{Romatschke:2003ms}. The use of this form is, however, not satisfactory in the cases where the transverse expansion is included in addition to the longitudinal Bjorken flow. If the effects connected with shear viscosity are taken into account, the presence of the transverse flow induces differences between the two components of pressure in the transverse plane. The Romatschke-Strickland form allows for the difference between the longitudinal and transverse pressures but the two transverse pressures must be identical.

An essential new feature of the present work is the generalization of the Romatschke-Strickland form to the expression which allows for three different components of pressure
\begin{eqnarray}
f(x,p) = k \exp\left(-\frac{1}{\Lambda}\sqrt{ \left( 1 + \zeta_X \right) \left( p\cdot X \right)^2 + \left( 1 + \zeta_Y \right) \left( p\cdot Y \right)^2 + \left( 1 + \zeta_Z \right) \left( p\cdot Z \right)^2 }\right).
\label{fzeta}
\end{eqnarray}
Here $k$ is an overall normalization constant, $\Lambda$ is the typical momentum scale, and $\zeta_I$'s $(I=X,Y,Z)$ are three anisotropy parameters. In the special case where $\zeta_X=\zeta_Y=0$, Eq.~(\ref{fzeta}) is reduced to the Romatschke-Strickland form \cite{Romatschke:2003ms}. In more general cases, the function (\ref{fzeta}) depends on the three different ratios $(1+\zeta_I)/\Lambda^2$. Exactly this feature allows us to introduce three different components of pressure in the local rest frame.

Introducing the new variables, namely,
\begin{eqnarray}
\lambda = \frac{ \Lambda }{ \sqrt{ 1 +
\frac{ 1 }{ 3 } (\zeta_X + \zeta_Y + \zeta_Z)} }, \quad \xi_I = \frac{1 +\zeta_I}{ 1 + \frac{1}{ 3 } ( \zeta_X + \zeta_Y + \zeta_Z )} - 1
\quad (I=X,Y,Z),
\end{eqnarray}
the distribution function (\ref{fzeta}) may be rewritten in the equivalent form as
\begin{eqnarray}
f(x,p) &=& k \exp\left(-\frac{1}{\lambda}\sqrt{ \left( 1 + \xi_X \right) \left( p\cdot X \right)^2 + \left( 1 + \xi_Y \right) \left( p\cdot Y \right)^2 + \left( 1 + \xi_Z \right) \left( p\cdot Z \right)^2 }\,\,\right) \nonumber \\
&=& k \exp\left(-\frac{1}{\lambda}\sqrt{
\left( p\cdot U\right)^2 +
\xi_X \left( p\cdot X \right)^2 +
\xi_Y \left( p\cdot Y \right)^2 +
\xi_Z \left( p\cdot Z \right)^2 }\,\,\right),
\label{fxi}
\end{eqnarray}
where the new anisotropy parameters $\xi_I$ satisfy the condition
\begin{eqnarray}
\sum_I \xi_I = \xi_X + \xi_Y + \xi_Z = 0.
\label{sumofxis}
\end{eqnarray}
To replace the first line in Eq.~(\ref{fxi}) by the second line we used Eq.~(\ref{Delta}) and the mass-shell condition $p^2=m^2=0$. The physical constraints $\Lambda > 0$ and $1+\zeta_I > 0$ imply that $\lambda > 0$ and $1+\xi_I > 0$. Hence, the initial parametrization (\ref{fzeta}) is completely equivalent to the new one. Below we shall use the expression (\ref{fxi}) and treat the scale $\lambda$ together with the two anisotropy parameters $\xi_X$ and $\xi_Y$ as three independent variables~\footnote{We note that the covariant form of the distribution function depends also on the transverse fluid rapidity $\theta_\perp$ through the vectors $U^\mu$ and $X^\mu$. Hence, we have in fact four independent scalar functions in (\ref{fxi}).}. Equation (\ref{sumofxis}) defines the applicability range of our parameterization
\begin{eqnarray}
-1 < \xi_X, \quad -1 < \xi_Y, \quad \xi_X+\xi_Y < 1.
\label{range}
\end{eqnarray}
The equilibrium function in (\ref{RTA}) has the form
\begin{eqnarray}
f_{\rm eq}(x,p) = k \exp\left( - \frac{p \cdot U}{T}
\right).
\label{feq}
\end{eqnarray}
One can show that the distribution function (\ref{fxi}) is reduced to the form (\ref{feq}) with $\lambda=T$, if the anisotropy parameters $\xi_I$ are all set equal to zero.

\section{Zeroth moment and particle number density}
\label{sect:0mom}

In this Section we present the zeroth moment of the Boltzmann equation. The zeroth and the first moments of the Boltzmann equation were used in Refs.~\cite{Martinez:2010sc,Martinez:2010sd,
Martinez:2012tu} to derive equations of anisotropic hydrodynamics in the direct relation to kinetic theory. This approach is suitable for the analysis of one-dimensional boost-invariant flow, since the first two moments yield three equations for three unknown functions (in this work these functions have been introduced as $\Lambda$, $T$, and $\zeta_Z$). If the transverse flow is included, one has to take into consideration one or more equations from the second moment of the Boltzmann equation. Below, we shall argue that in the boost-invariant and cylindrically symmetric case (with non-zero radial flow) it is preferable to consider two equations from the second moment rather than one equation from the zeroth moment together with an extra equation obtained from the second moment. Consequently, the formulas introduced in this Section will serve only as the reference point.

Having in mind the comments stated above, we introduce the zeroth moment of the kinetic equation (\ref{RTA})
\begin{eqnarray}
\int\!\! dP \; p\cdot\partial f = \frac{1}{\tau_{\rm eq}} \int\!\! dP \, p\cdot U
\left( f_{\rm eq} -f \right).
\label{zm1}
\end{eqnarray}
Here $dP=d^3{\bf p}/p$ is the Lorentz invariant integration measure (for massless particles considered in this work \mbox{$p = \sqrt{p_x^2+p_y^2+p_z^2}$}). Using the standard definition of the particle number current we find
\begin{eqnarray}
N^\mu = \int dP\, p^\mu f = n \,U^\mu, \quad N^\mu_{\rm eq} = \int dP\, p^\mu f_{\rm eq}
= n_{\rm eq}\, U^\mu,
\end{eqnarray}
and
\begin{eqnarray}
D n + n \theta = \frac{1}{\tau_{\rm eq}} \left(
n_{\rm eq} - n \right),
\label{zm2}
\end{eqnarray}
where $\theta$ is the expansion parameter defined in (\ref{volexpp}). We note that there are no terms proportional to the four-vectors $X^\mu$, $Y^\mu$ or $Z^\mu$ in the expansion of the current $N^\mu$. This is due to the quadratic dependence of the distribution function (\ref{fxi}) on these four-vectors. Dividing (\ref{zm2}) by $n$ we may further rewrite the zeroth moment equation as
\begin{eqnarray}
D \ln n +
\theta = \frac{1}{\tau_{\rm eq}}
\left( \frac{n_{\rm eq}}{n}-1\right).
\label{zm3}
\end{eqnarray}

The particle number density $n$ calculated for the anisotropic distribution function (\ref{fxi}) equals
\begin{eqnarray}
n(\lambda,\xi) =
\frac{8\pi k \lambda^3}{\sqrt{1+\xi_X} \sqrt{1+\xi_Y}\sqrt{1+\xi_Z}}.
\label{n}
\end{eqnarray}
On the left-hand side of (\ref{n}) we use the short-hand notation, $\xi$, to denote three anisotropy parameters $\xi_X$, $\xi_Y$, and $\xi_Z=-\xi_X-\xi_Y$. In equilibrium, the expression for the particle number density simplifies to
\begin{eqnarray}
n_{\rm eq}(T) = 8\pi k T^3.
\label{neq}
\end{eqnarray}

\section{First moment of kinetic equation}
\label{sect:1mom}

\subsection{Energy-momentum conservation}
\label{sect:enmomcon}

The first moment of the kinetic equation (\ref{RTA}) reads
\begin{eqnarray}
\int\!\! dP \; p^\nu p\cdot\partial f = \frac{1}{\tau_{\rm eq}} \int\!\! dP \,p^\nu \, p\cdot U
\left( f_{\rm eq} -f \right).
\label{fm1}
\end{eqnarray}
With the energy-momentum tensors defined by the second moments of the distribution functions,
\begin{eqnarray}
T^{\mu\nu} = \int dP p^\mu p^\nu f, \quad T^{\mu\nu}_{\rm eq} = \int dP p^\mu p^\nu f_{\rm eq},
\label{Tmunus}
\end{eqnarray}
we may rewrite Eq.~(\ref{fm1}) as
\begin{eqnarray}
\partial_\mu T^{\mu\nu} = \frac{1}{\tau_{\rm eq}} \left( U_\mu T^{\mu\nu}_{\rm eq} - U_\mu T^{\mu\nu} \right).
\label{fm2}
\end{eqnarray}
Since we want to conserve energy and momentum in the system, the left-hand side of Eq.~(\ref{fm2}) must vanish
\begin{eqnarray}
\partial_\mu T^{\mu\nu} = 0.
\label{enmomcon}
\end{eqnarray}
This leads us to the conclusion that the first-moment equations (\ref{fm1}) and (\ref{fm2}) are satisfied only if the Landau matching condition is satisfied
\begin{eqnarray}
U_\mu T^{\mu\nu}_{\rm eq} = U_\mu T^{\mu\nu}.
\label{LM1}
\end{eqnarray}

The form of the distribution function (\ref{fxi}) implies that the energy-momentum tensor of the anisotropic system has the structure
\begin{eqnarray}
T^{\mu \nu} = \varepsilon\, U^\mu U^\nu + P_X X^\mu X^\nu + P_Y Y^\mu Y^\nu + P_Z Z^\mu Z^\nu
= \varepsilon\, U^\mu U^\nu + \sum_I P_I I^\mu I^\nu,
\label{Tmunu}
\end{eqnarray}
where $\varepsilon$ is the energy density, while $P_X, P_Y$ and $P_Z$ are three different pressure components. In the local rest frame the energy-momentum tensor has the diagonal structure,
\begin{equation}
T^{\mu \nu} = \left(
\begin{array}{cccc}
\varepsilon & 0 & 0 & 0 \\
0 & P_X & 0 & 0 \\
0 & 0 & P_Y & 0 \\
0 & 0 & 0 & P_Z
\end{array} \right).
\label{Tmunuarray}
\end{equation}
In local equilibrium, $\varepsilon = \varepsilon_{\rm eq}$ and the three pressures become equal, $P_X = P_Y = P_Z = P_{\rm eq} = \varepsilon/3 $. Hence, the equilibrium energy-momentum tensor has the expected form
\begin{eqnarray}
T^{\mu \nu}_{\rm eq} &=&
\varepsilon_{\rm eq} U^\mu U^\nu + P_{\rm eq} X^\mu X^\nu + P_{\rm eq} Y^\mu Y^\nu + P_{\rm eq} Z^\mu Z^\nu, \nonumber \\
&=& \varepsilon_{\rm eq} U^\mu U^\nu - P_{\rm eq}
\Delta^{\mu\nu} = \left(\varepsilon_{\rm eq} +
P_{\rm eq} \right) U^\mu U^\nu - P_{\rm eq} g^{\mu\nu}.
\label{Tmunueq}
\end{eqnarray}
The use of the expressions (\ref{Tmunu}) and (\ref{Tmunueq}) in the Landau matching condition (\ref{LM1}) leads directly to the two equations
\begin{eqnarray}\label{p_e_d}
\varepsilon U^\mu = \varepsilon_{\rm eq} U^\mu, \quad \quad \varepsilon = \varepsilon_{\rm eq}.
\end{eqnarray}
We thus see that the Landau matching condition implies simply that the energy density of the system should be equal to the energy density of the thermal background. This requirement allows us to determine the effective temperature $T$ appearing in the thermal distribution $f_{\rm eq}$.

For boost-invariant and cylindrically symmetric systems only two out of four equations appearing in the conservation laws (\ref{enmomcon}) are independent. They are the same as those derived in Ref. \cite{Florkowski:2011jg} and may be written in the compact form as
\begin{eqnarray}
D \varepsilon + \varepsilon \, \theta - \sum_I P_I \theta_I = 0
\label{enmom1}
\end{eqnarray}
and
\begin{eqnarray}
&& \left( X\cdot\partial \right) P_X + P_X \left( \partial\cdot X \right) - \varepsilon \left( X\cdot DU \right) - P_Y \left[ X\cdot\left( Y\cdot\partial \right)Y \right] - P_Z \left[ X\cdot\left( Z\cdot\partial \right)Z \right] = 0. \label{enmom2}
\end{eqnarray}
See Sec.~\ref{sect:explicitr} for the explicit formulas of the derivatives appearing in (\ref{enmom1}) and (\ref{enmom2}).

The standard dissipative hydrodynamics is based on the gradient expansion around the isotropic background. In this case, one usually considers small deviations from the equilibrium values. From this point of view it is interesting and useful to consider the close-to-equilibrium limit of our framework. Therefore, we introduce deviations from the equilibrium pressure, $\pi_I$'s, defined by the relations
\begin{equation}\label{pi_I}
P_X = P_{\rm eq} + \pi_X,
\qquad P_Y = P_{\rm eq} + \pi_Y,
\qquad P_Z = P_{\rm eq} + \pi_Z.
\end{equation}
The sum of the pressure deviations is equal to zero
\begin{eqnarray}
\sum_I \pi_I = \pi_X + \pi_Y + \pi_Z = 0.
\label{sumpiI}
\end{eqnarray}
The equilibrium pressure $P_{\rm eq}$ is one third of the energy density, $P_{\rm eq}=\varepsilon/3$. Changing from $P_I$'s to $\pi_I$'s we rewrite Eq.~(\ref{enmom1}) in the equivalent forms
\begin{equation}\label{alenmom2}
D\varepsilon + \frac{4}{3}\theta - \sum_I \pi_I\theta_I = 0, \qquad D\ln\varepsilon = -\frac{4}{3}\theta + \sum_I \frac{\pi_I}{\varepsilon}\,\theta_I.
\end{equation}

\subsection{Close-to-equilibrium behavior}
\label{sect:closetoeq}

In order to find the pressure deviations $\pi_I$'s in the close-to-equilibrium limit, we expand the anisotropic distribution function around the thermal background,
\begin{equation}
f \simeq f_{\rm eq} \left( 1 + \frac{\lambda - T}{ T^2 } (p\cdot U) - \frac{ \xi_X (p\cdot X)^2 + \xi_Y (p\cdot Y)^2 + \xi_Z (p\cdot Z)^2 }{2T (p\cdot U)} \right).
\end{equation}
Here, we neglect higher order contributions in $\xi_I$'s and in the difference $\lambda-T$. Then, the energy-momentum tensor reads
\begin{eqnarray}
T^{\mu\nu} &\simeq& T^{\mu\nu}_{\rm eq} + 96\, \pi\,k\,T^3\,U^\mu U^\nu \left( \frac{}{} \lambda - T \right)- 32\,\pi\,k\,T^3\,\Delta^{\mu\nu}\left( \frac{}{} \lambda - T \right)
\nonumber \\
&& -\frac{32\, \pi}{5}k\, T^4 \left( \frac{}{} \xi_X X^\mu X^\nu + \xi_Y Y^\mu Y^\nu + \xi_Z Z^\mu Z^\nu \right).
\end{eqnarray}
Using the Landau matching~(\ref{p_e_d}) we find that $\lambda=T$ in the leading order. Hence, the energy-momentum tensor in the leading order reads
\begin{equation}
T^{\mu\nu} \simeq T^{\mu\nu}_{\rm eq} -\frac{32 \pi}{5}k\, T^4 \left( \frac{}{} \xi_X X^\mu X^\nu + \xi_Y Y^\mu Y^\nu + \xi_Z Z^\mu Z^\nu \right).
\end{equation}
This expression helps us to identify directly the pressure corrections:
\begin{equation}\label{shear-eq1}
\pi_X \simeq - \frac{ 32 \pi k T^4}{5}\xi_X,
\qquad \pi_Y \simeq - \frac{32 \pi k T^4}{5}\xi_Y,
\qquad
\pi_Z \simeq - \frac{32 \pi k T^4}{5}\xi_Z.
\end{equation}
It is interesting to observe that the pressure corrections are directly proportional to the anisotropy parameters.

\section{Energy density and anisotropic pressure}
\label{sect:enedenaniso}

The energy density for the anisotropic distribution (\ref{fxi}) may be obtained from the contraction of the energy-momentum tensor with the four-vectors $U$, namely, $\varepsilon = U_\mu U_\nu T^{\mu\nu}$. This gives
\begin{eqnarray}
\varepsilon(\lambda,\xi) = 24 \pi k \lambda^4 {\cal R}(\xi),
\label{eps1}
\end{eqnarray}
where the function ${\cal R}(\xi)$ is defined by the integral (for details see Sec.~\ref{sect:R})
\begin{equation}\label{R}
{\cal R}(\xi) = \frac{1}{4\pi \sqrt{ \prod_J(1 + \xi_J) } }\int_0^{2\pi}\!\!\!\! {\rm d}\phi \int_0^\pi \!\!\! {\rm d}\theta \sin\theta \sqrt{ \frac{ \cos^2\phi\sin^2\theta }{1+\xi_X} + \frac{ \sin^2\phi\sin^2\theta }{1+\xi_Y} + \frac{ \cos^2\theta }{1+\xi_Z} }.
\end{equation}
In the case of local equilibrium we have
\begin{eqnarray}
\varepsilon_{\rm eq}(T) = 24 \pi k T^4.
\label{epseq1}
\end{eqnarray}
This leads us to the equivalent formulation of the Landau matching condition (\ref{LM1}) in the form
\begin{eqnarray}
T^4 = \lambda^4 {\cal R}(\xi),
\label{LM2}
\end{eqnarray}
which resembles the condition derived first in \cite{Martinez:2010sc}. It is important to notice, however, that our definitions of the parameters $\lambda$ and $\xi$ are different from the definitions of the parameters $\Lambda$ and $\xi$ used in \cite{Martinez:2010sc}~\footnote{The use of a single parameter $\xi$ in \cite{Martinez:2010sc} corresponds to the use of the non-zero parameter $\zeta_Z$ in our approach with the constraint $\zeta_X=\zeta_Y=0$. Our triplet of $\xi_I$'s satisfies the condition (\ref{sumofxis}). By the way, one can use the condition (\ref{sumofxis}) to check that the expansion of the function ${\cal R}(\xi)-1$ around zero has only quadratic terms in $\xi_I$'s. This is in agreement with the leading order result $\lambda=T$.}.

Similarly to the energy density, the three components of anisotropic pressure may be obtained from three contractions of the energy-momentum tensor with the four-vectors $X$, $Y$, and $Z$, namely, $P_X = X_\mu X_\nu T^{\mu\nu}$, $P_Y = Y_\mu Y_\nu T^{\mu\nu}$, and $P_Z = Z_\mu Z_\nu T^{\mu\nu}$. This leads to the formula
\begin{eqnarray}
P_I(\lambda,\xi) = 24 \pi k \lambda^4 {\cal H}_I (\xi),
\label{P_I}
\end{eqnarray}
where the functions ${\cal H}_I$ may be obtained by differentiation of the function ${\cal R}$ (the definitions of the functions ${\cal H}_I$ as integrals, which leads directly to (\ref{H_I}), are given also in Sec.~\ref{sect:R})
\begin{eqnarray}\nonumber
{\cal H}_I &=& -\frac{ 2\left( 1+ \xi_I \right) }{\sqrt{\prod_J (1 + \xi_J)}} \partial_{\xi_I}\left[ \sqrt{\prod_J (1 + \xi_J)}\;{\cal R} \right] \\
&=& -2\left( 1+ \xi_I \right){\cal R} \, \partial_{\xi_I}\left\{ \ln \left[ \sqrt{\prod_J (1 + \xi_J)} \; {\cal R} \right] \right\}
\label{H_I} \\ \nonumber
&=& -{\cal R} -2{\cal R}\left( 1 + \xi_I \right) \partial_{\xi_I} \left[ \ln\left( \frac{}{} {\cal R} \right) \right]	.
\end{eqnarray}
Since we consider a system of massless particles, $\varepsilon = P_X +P_Y + P_Z$ and the functions ${\cal H}_I$ satisfy the constraint
\begin{equation}
\sum_I {\cal H}_I = {\cal R}.
\label{sumH_I}
\end{equation}

\section{Second moment of kinetic equation}
\label{sect:2mom}

The second moment of the Boltzmann equation may be written in the form analogous to Eq.~(\ref{fm2}),
\begin{eqnarray}
\partial_\lambda \Theta^{\lambda\mu\nu} = \frac{1}{\tau_{\rm eq}} \left(U_\lambda\Theta_{\rm eq}^{\lambda\mu\nu} - U_\lambda\Theta^{\lambda\mu\nu}\right),
\label{tmom}
\end{eqnarray}
where
\begin{eqnarray}
\Theta^{\lambda\mu\nu} = \int\!\! dP \; p^\lambda p^\mu p^\nu f, \quad
\Theta^{\lambda\mu\nu}_{\rm eq} = \int\!\! dP \; p^\lambda p^\mu p^\nu f_{\rm eq}.
\label{Thetas}
\end{eqnarray}
The only non-vanishing terms in (\ref{Thetas}) are those with an even number of each spatial index. In the covariant form they read
\begin{eqnarray}
\Theta &=& \Theta_U \left[ U\otimes U \otimes U\right]
\nonumber \\
&& \,+\, \Theta_X \left[ U\otimes X \otimes X +X\otimes U \otimes X + X\otimes X \otimes U\right]
\nonumber \\
&& \,+\, \Theta_Y \left[ U\otimes Y \otimes Y +Y\otimes U \otimes Y + Y\otimes Y \otimes U\right]
\nonumber \\
&& \,+\, \Theta_Z \left[ U\otimes Z \otimes Z +Z\otimes U \otimes Z + Z\otimes Z \otimes U\right].
\label{Theta}
\end{eqnarray}
Due to the mass-shell condition $p^2 = m^2 = 0$, the coefficients in the expansion (\ref{Theta}) are not independent. One may check that
\begin{eqnarray}
\Theta_X + \Theta_Y + \Theta_Z = \Theta_U.
\label{ThetaU}
\end{eqnarray}
This and other tensor identities may be most easily checked in the local rest frame. A similar argument holds for the projections of $\Theta_{\rm eq}^{\lambda \mu\nu}$. In addition, due to the rotation invariance of the equilibrium distribution we have
\begin{eqnarray}
\Theta^{\rm eq}_X = \Theta^{\rm eq}_Y = \Theta^{\rm eq}_Z = \Theta_{\rm eq}\,.
\label{ThetaU}
\end{eqnarray}
Out of the ten independent equations in (\ref{tmom}) five are trivial $0=0$ equations. They correspond to the contractions of (\ref{tmom}) with $U\otimes Y$, $U\otimes Z$, $X\otimes Y$, $X\otimes Z$, and $Y\otimes Z$. The contraction with $U\otimes U$ may be represented as a linear combination of the contractions with $X\otimes X$,
$Y\otimes Y$, and $Z\otimes Z$. As a consequence, we deal with four independent contractions, namely, with $U\otimes X$, $X\otimes X$, $Y\otimes Y$, and $Z\otimes Z$. The contraction of (\ref{tmom}) with $U\otimes X$ gives
\begin{eqnarray}
D \left(\Theta_U + 2 \Theta_X \right) +
\left( X\cdot\partial \right) \Theta_X =
\frac{\sinh\theta_\perp}{\tau} \left( \Theta_Z - \Theta_X \right) +
\frac{\cosh\theta_\perp}{r} \left( \Theta_Y - \Theta_X \right),
\label{tmomUX}
\end{eqnarray}
with $X\otimes X$
\begin{eqnarray}
D \Theta_X + \Theta_X \left( \theta - 2\theta_X \right) = \frac{1}{\tau_{\rm eq}} \left(\Theta_{\rm eq} - \Theta_X \right),
\label{tmomXX}
\end{eqnarray}
with $Y\otimes Y$
\begin{eqnarray}
D \Theta_Y + \Theta_Y \left( \theta - 2\theta_Y \right) = \frac{1}{\tau_{\rm eq}} \left(\Theta_{\rm eq} - \Theta_Y \right),
\label{tmomYY}
\end{eqnarray}
and, finally, with $Z\otimes Z$
\begin{eqnarray}
D \Theta_Z + \Theta_Z \left( \theta - 2\theta_Z \right) = \frac{1}{\tau_{\rm eq}} \left(\Theta_{\rm eq} - \Theta_Z \right).
\label{tmomZZ}
\end{eqnarray}

\section{Selection of equations of motion - matching with Israel-Stewart theory}
\label{sect:set}

In the considered model we have five independent parameters (more precisely, five scalar functions of the proper time, $\tau$, and the transverse distance, $r$). These are: the momentum scale, $\lambda$, the effective temperature, $T$, the transverse rapidity, $\theta_\perp$, and two independent anisotropy parameters, for example, $\xi_X$ and $\xi_Y$. The two nontrivial equations from the first moment of the Boltzmann equation, Eqs.~(\ref{enmom1}) and (\ref{enmom2}), as well as the Landau matching condition~(\ref{LM2}) ensure local energy and momentum conservation. Therefore, these three equations should be definitely included in the computational scheme of anisotropic hydrodynamics.

The problem arises which equations should be taken into account in addition to the first-moment equations. We need two extra equations and they should be selected out of Eqs.~(\ref{zm3}), (\ref{tmomUX}), (\ref{tmomXX}), (\ref{tmomYY}), and (\ref{tmomZZ}). An important requirement for our approach is that it must agree with the Israel-Stewart approach in the close-to-equilibrium limit. In this case, the pressure corrections satisfy the three symmetric equations~\footnote{Due to the conditions $\sum_I \pi_I =0$ and $\sum_I \sigma_I =0$ only two out of three equations in (\ref{2_ord_visc}) are independent.}
\begin{equation}
\tau_\pi D\pi_I + \pi_I = 2 \eta \sigma_I + F_\eta \,\pi_I,
\label{2_ord_visc}
\end{equation}
where \cite{Muronga:2003ta}
\begin{eqnarray}
F_\eta &=& - \eta T \partial \cdot \left( \frac{\alpha_1}{2 T} U \right) .
\label{F}
\end{eqnarray}
In Eqs.~(\ref{2_ord_visc}) and (\ref{F}) the quantity $\tau_\pi$ is the relaxation time for the shear viscous corrections $\pi_I$, $\eta$ is the shear viscosity, and $\alpha_1$ is one of the kinetic coefficients appearing in second order hydrodynamics~\cite{Israel:1979wp}. The symmetric form of the three equations appearing in (\ref{2_ord_visc}) suggests that one should use Eqs.~(\ref{tmomXX}), (\ref{tmomYY}), and (\ref{tmomZZ}) as a starting point for possible generalizations of (\ref{2_ord_visc}) to the case of high pressure anisotropy. The use of the zeroth moment equation combined with one of the equations obtained from the second moment leads to asymmetric treatment of different anisotropies, which contradicts the symmetric form of Eqs.~(\ref{2_ord_visc}).

In the remaining part of this Section we show that two linear combinations of Eqs.~(\ref{tmomXX})--(\ref{tmomZZ}) provide indeed a system of equations which agree with Eqs.~(\ref{2_ord_visc}) in the close-to-equilibrium limit. At first, it is useful to take advantage of the fact that $\Theta_I$'s are positive,
\begin{equation}
\Theta_I = \int\!\! dP \, p \, (p\cdot I)^2 f = \frac{ 32 \, \pi \, k \, \lambda^5 }{ \sqrt{ \prod_J(1+\xi_J) } } \frac{1}{1+\xi_I},
\label{Theta_I}
\end{equation}
\begin{equation}
\Theta_{\rm eq} = \int\!\! dP \, p \, (p\cdot I)^2 f_{\rm eq} = { 32 \, \pi \, k \, T^5 } \qquad (I=X,Y,Z).
\label{Theta_eq}
\end{equation}
Then, we rewrite Eqs.~(\ref{tmomXX})--(\ref{tmomZZ}) dividing each of them first by $\Theta_I$. In this way we obtain
\begin{eqnarray}
D \ln\Theta_I + \theta - 2\theta_I = \frac{1}{\tau_{\rm eq}} \left[ \frac{\Theta_{\rm eq}}{\Theta_I} - 1 \right].
\label{(I)/I}
\end{eqnarray}
In the next step, we define the two desired equations by taking Eqs.~(\ref{(I)/I}) for $I=X$ and $I=Y$, and by subtracting one third of the sum of Eqs.~(\ref{(I)/I}) from these two equations
\begin{eqnarray}
D \ln\Theta_I + \theta - 2\theta_I -\frac{1}{3}\sum_J \left[\frac{}{} D \ln\Theta_J + \theta - 2\theta_J\right] = \frac{1}{\tau_{\rm eq}} \left[ \frac{\Theta_{\rm eq}}{\Theta_I} - 1 \right] -\frac{1}{3}\sum_J \left\{ \frac{1}{\tau_{\rm eq}} \left[ \frac{\Theta_{\rm eq}}{\Theta_J} - 1 \right] \right\} \quad (I=X,Y).
\label{sum}
\end{eqnarray}
Of course, other choices of the two indices $I$ are also possible. The very important feature of our strategy is that fulfilling Eqs.~(\ref{sum}) for arbitrary two indices implies that the same equation is fulfilled for the remaining third index. To demonstrate this property, we first make use of Eqs.~(\ref{Theta_I}) and (\ref{Theta_eq}) to rewrite Eqs.~(\ref{sum}) in the simpler form as
\begin{equation}
\frac{D\xi_I}{1+\xi_I}
-\frac{1}{3}\sum_J\frac{D\xi_J}{1+\xi_J}
+ 2\sigma_I + \frac{\xi_I}{\tau_{\rm eq}} \left( \frac{T}{\lambda} \right)^5\sqrt{ \prod_J(1+\xi_J) } = 0 \quad (I=X,Y).
\label{explicit-sum2}
\end{equation}
If Eq.~(\ref{explicit-sum2}) is fulfilled for $I=X$ and $I=Y$, the same equation holds for $I=Z$. Indeed, if we use the properties $\sigma_Z = -\sigma_X - \sigma_Y$ and $\xi_Z = -\xi_X - \xi_Y$, then the straightforward calculation shows
\begin{eqnarray}\nonumber
&& \frac{D\xi_Z}{1+\xi_Z}
-\frac{1}{3}\sum_J\frac{D\xi_J}{1+\xi_J}
+ 2\sigma_Z + \frac{\xi_Z}{\tau_{\rm eq}} \left( \frac{T}{\lambda} \right)^5\sqrt{ \prod_J(1+\xi_J) } \\\nonumber
&& = \frac{D\xi_Z}{1+\xi_Z}
-\frac{1}{3}\sum_J\frac{D\xi_J}{1+\xi_J}
+ \left[ -2 \sigma_X -\frac{\xi_X}{\tau_{\rm eq}} \left( \frac{T}{\lambda} \right)^5\sqrt{ \prod_J(1+\xi_J) } \right] + \left[ -2 \sigma_Y -\frac{\xi_Y}{\tau_{\rm eq}} \left( \frac{T}{\lambda} \right)^5\sqrt{ \prod_J(1+\xi_J) } \right] \\
&& = \frac{D\xi_Z}{1+\xi_Z} + \frac{D\xi_X}{1+\xi_Z} + \frac{D\xi_Y}{1+\xi_Y} - \sum_J\frac{D\xi_J}{1+\xi_J} \equiv 0.
\end{eqnarray}

Close to equilibrium, the anisotropy parameters $\xi_I$ are proportional to the pressure corrections $\pi_I$. Using Eqs.~(\ref{p_e_d}), (\ref{shear-eq1}), and (\ref{epseq1}), we find
\begin{equation}\label{shear-eq2}
\xi_X \simeq -\frac{15}{4} \frac{ \pi_X}{ \varepsilon } \qquad \xi_Y \simeq -\frac{15}{4} \frac{ \pi_Y}{ \varepsilon } \qquad \xi_Z \simeq -\frac{15}{4} \frac{ \pi_Z}{ \varepsilon }.
\end{equation}
Since $\xi_X + \xi_Y + \xi_Z =0$ and $\lambda=T$ up to quadratic terms in the anisotropy parameters, Eqs.~(\ref{explicit-sum2}) reads
\begin{equation}\label{IS-equivalent}
D\xi_I + 2\sigma_I + \frac{\xi_I}{\tau_{\rm eq}} \simeq 0.
\end{equation}
Using Eqs.~(\ref{shear-eq2}) and multiplying the last equation by $-4\varepsilon/15$ we obtain
\begin{equation}\label{IS-equivalent}
D\pi_I -\pi_I D\ln\varepsilon - 2 \left( \frac{4}{15}\, \varepsilon \right)\sigma_I + \frac{\pi_I}{\tau_{\rm eq}} \simeq 0,
\end{equation}
or
\begin{equation}\label{IS-equivalent2}
\tau_{\rm eq} D\pi_I + \pi_I = 2 \left( \frac{4}{15}\, \varepsilon \, \tau_{\rm eq}\right) \sigma_I +\tau_{\rm eq}\, \pi_I \, D\ln\varepsilon.
\end{equation}
The two terms on the left-hand side of (\ref{IS-equivalent2}) and the first term on the right-hand side of (\ref{IS-equivalent2}) agree with the corresponding terms in Eq.~(\ref{2_ord_visc}) provided we connect the relaxation time $\tau_\pi$ and the shear viscosity $\eta$ with the relaxation time $\tau_{\rm eq}$ and the energy density $\varepsilon$ by the expressions
\begin{equation}\label{transport_coefficients}
\tau_\pi = \tau_{\rm eq}, \qquad \eta = \frac{4}{15} \, \varepsilon \, \tau_{\rm eq}.
\end{equation}
Finally, one may find the value of the coefficient $\alpha_1$ comparing the last term with the expression for $ F_\eta \pi_I$
. Using (\ref{F}), one finds

\begin{eqnarray}
\pi_I F_\eta &=&
- \pi_I \eta T \, \partial \cdot \left( \frac{\alpha_1}{2 T} U \right) = \pi_I \left[ -\partial\cdot\left(\frac{}{}\eta\alpha_1\, U\right) + \eta\alpha_1D\ln\left(\frac{}{}\eta T\right) \right] = \\ \nonumber
&=& \pi_I \left[ -\left( \eta\alpha_1 \right)\theta - D\left( \eta\alpha_1 \right) +\left(\eta\alpha_1\right)D\ln\left(\frac{}{}\eta T\right) \right]
\end{eqnarray}
With the relaxation time $\tau_\pi$ and the viscosity $\eta$ identified through Eq.~(\ref{transport_coefficients}) and also using Eqs.~(\ref{p_e_d}), (\ref{alenmom2}), and (\ref{epseq1}) we obtain
\begin{eqnarray}\nonumber
\pi_I F_\eta &=&
\pi_I \left[ \frac{3}{4}\left(\eta\alpha_1\right)D\ln\varepsilon -\frac{1}{4}\left(\eta\alpha_1\right) \sum_J \frac{\pi_J}{\varepsilon}\theta_J - D\left(\eta\alpha_1\right) +\frac{5}{4}\left(\eta\alpha_1\right)D\ln\varepsilon + \left(\eta\alpha_1\right) D\ln\tau_\pi \right] = \\
&=& \pi_I \left[ 2\left(\eta\alpha_1\right) D\ln\varepsilon +\left(\eta\alpha_1\right)D\ln\tau_\pi -D\left(\eta\alpha_1\right) \right]
\end{eqnarray}
Since the last term is quadratic in the pressure corrections, it can be neglected, while the remaining part equals the last term in Eq.~(\ref{IS-equivalent2}) when

\begin{equation}
\eta\alpha_1 = \frac{1}{2} \tau_\pi \qquad \hbox{or} \qquad \alpha_1=\frac{\tau_\pi}{2\eta},
\end{equation}
which is consistent with the Israel-Stewart theory~\cite{Muronga:2001zk}.

\section{Entropy source}
\label{sect:ent}

An important verification of our scheme based on Eqs.~(\ref{enmom1}), (\ref{enmom2}), (\ref{LM2}), and (\ref{explicit-sum2}) is checking if it leads to the positively defined entropy source. Using the Boltzmann definition of the entropy current, we find that it is proportional to the particle density
\begin{eqnarray}
\sigma^\mu = \sigma U^\mu = 4 n U^\mu,
\label{entcur}
\end{eqnarray}
where $n$ is defined by Eq.~(\ref{n}), hence
\begin{eqnarray}
\sigma(\lambda,\xi) =
\frac{32\pi k \lambda^3}{\sqrt{1+\xi_X} \sqrt{1+\xi_Y}\sqrt{1+\xi_Z}}.
\label{sigma}
\end{eqnarray}
Combining this equation with the formula for the energy density we find
\begin{equation}\label{eps2}
\varepsilon = 24\, \pi \, k \left( \frac{ \sigma }{32\, \pi \, k} \sqrt{ \prod_I \left( 1 +\xi_I \right) } \right)^{\frac{4}{3}} {\cal R} = \frac{3}{8} \left( 4\, \pi \, k \right)^{-\frac{1}{3}} \sigma^{\frac{4}{3}} \left[ \prod_J \left( 1 + \xi_J \right) \right]^{\frac{2}{3}} {\cal R}.
\end{equation}
Substituting this equation into Eq.~(\ref{enmom1}) gives
\begin{equation}
\frac{4}{3} \left( \frac{}{} D\ln\sigma + \theta \right) +\frac{2}{3}\sum_I \frac{ D \xi_I }{ 1 + \xi_I } + D\ln {\cal R} -\sum_I \frac{ \pi_I }{\varepsilon} \theta_I = 0,
\label{entr_sour_1}
\end{equation}
We note that the expression in the bracket on the left-hand side of Eq.~(\ref{entr_sour_1}) is proportional to the entropy source
\begin{eqnarray}
\Sigma = \partial_\mu \sigma^\mu =
\partial_\mu \left(\sigma U^\mu \right) = D \sigma + \sigma \theta.
\end{eqnarray}

We shall express now the last two terms in (\ref{entr_sour_1}) in terms of the functions ${\cal R}$ and ${\cal H}_I$. From Eq.~(\ref{H_I}) we calculate the $\xi_I$ derivative of $\ln({\cal R})$
\begin{equation}\label{derR}
\partial_{\xi_I} \left[ \ln\left( \frac{}{} {\cal R} \right) \right] = -\frac{1}{2\left( 1 +\xi_I \right)}\left[ 1 + \frac{ {\cal H}_I }{ {\cal R} } \right].
\end{equation}
Hence, the convective derivative $D\ln({\cal R})$ reads
\begin{equation}\label{DlnR}
D\ln\left( \frac{}{} {\cal R} \right) = \sum_I D\xi_I \, \partial_{\xi_I} \left[ \ln\left( \frac{}{} {\cal R} \right) \right] = -\frac{1}{2} \sum_I \left[ 1 +\frac{ {\cal H}_I }{ {\cal R} } \right] \frac{ D\xi_I }{ 1 +\xi_I }.
\end{equation}
On the other hand, using definitions of the pressure corrections~(\ref{pi_I}) and of the functions ${\cal H}_I$, we find a useful expression for the $\pi/\varepsilon$ ratio
\begin{equation}
\frac{\pi_I}{\varepsilon} = -\frac{1}{3}\left[ 1 -3 \frac{ {\cal H}_I }{ {\cal R} } \right]
\label{pi_I/eps}
\end{equation}
and
\begin{equation}
\sum_I \frac{\pi_I}{\varepsilon} \theta_I = -\frac{1}{3}\sum_I \left[ 1 -3 \frac{ {\cal H}_I }{ {\cal R} } \right] \left( \frac{1}{3}\theta + \theta_I -\frac{1}{3}\theta \right) = -\frac{1}{6} \sum_I \left[ 1 -3 \frac{ {\cal H}_I }{ {\cal R} } \right] 2\sigma_I.
\label{sumpi_I/eps}
\end{equation}
Here we replaced the components of the expansion tensor $\theta_I$ by the components of the shear tensor $\sigma_I$.

Using Eqs.~(\ref{derR})--(\ref{sumpi_I/eps}), which are exact and do not refer to the small anisotropy limit, we may write
\begin{equation}\label{entr_sour_2}
\frac{4}{3} \, \frac{ \partial_\mu \sigma^\mu }{ \sigma } +\frac{1}{6}\sum_I \left[ 1 - 3\frac{ {\cal H}_I }{ {\cal R} } \right] \frac{ D \xi_I }{ 1 + \xi_I } +\frac{1}{6}\sum_I \left[ 1 - 3\frac{ {\cal H}_I }{ {\cal R} } \right] 2 \sigma_I = 0,
\end{equation}
or, equivalently,

\begin{equation}\label{entr_sour_3}
\frac{ \partial_\mu \sigma^\mu }{ \sigma } = - \sum_I\left[ \frac{1}{8} - \frac{3}{8} \frac{ {\cal H}_I }{ {\cal R} } \right] \left( \frac{ D\xi_I }{ 1 + \xi_I } + 2 \sigma_I \right).
\end{equation}
Using now Eqs.~(\ref{LM2}), (\ref{sumH_I}), and (\ref{explicit-sum2}) we find
\begin{equation}\label{entr_sour_4}
\frac{ \partial_\mu \sigma^\mu }{ \sigma } = \frac{1}{\tau_{\rm eq}}{\cal R}^{\frac{5}{4}}\sqrt{\prod_J\left( 1 + \xi_J \right)} \, \sum_I\left[ \frac{1}{8} - \frac{3}{8} \frac{ {\cal H}_I }{ {\cal R} } \right] \xi_I = -\frac{3}{ 8 \tau_{\rm eq} }{\cal R}^{\frac{5}{4}}\sqrt{\prod_J\left( 1 + \xi_J \right)} \, \sum_I\frac{ {\cal H}_I }{ {\cal R} }\xi_I \ge 0.
\end{equation}
The last inequality has been checked numerically in the allowed range of the parameters $\xi_X$ and $\xi_Y$, see Eqs.~(\ref{sumofxis}) and (\ref{range}). For small anisotropies we use Eqs.~(\ref{shear-eq2}) and (\ref{pi_I/eps}) to find that
\begin{eqnarray}
\frac{ \partial_\mu \sigma^\mu }{ \sigma } = \frac{1}{10 \tau_{\rm eq}} \sum_I \xi_I^2,
\label{entsorceapp}
\end{eqnarray}
which is again consistent with the Israel-Stewart theory.

\section{Summary and conclusions}
\label{sect:con}

In this paper we have used the projection method for boost-invariant and cylindrically symmetric systems to introduce a new formulation of anisotropic hydrodynamics that allows for three different values of pressure acting in three different directions. Our considerations have been based on the Boltzmann kinetic equation with the collision term treated in the relaxation time approximation. The momentum anisotropy has been included explicitly in the leading term of the distribution function.

A novel feature of our work is the complete analysis of the second moment of the Boltzmann equation, in addition to the zeroth and first moments that have been analyzed in earlier studies. The framework of anisotropic hydrodynamics should include five equations for five unknown functions: $\lambda$, $T$, $\theta_\perp$, $\xi_X$ and $\xi_Y$. The first two equations follow from the energy and momentum conservation, Eqs.~(\ref{enmom1}) and (\ref{enmom2}). Their explicit, extended versions are
\begin{eqnarray}
&& \left( \cosh \theta_\perp \partial_\tau
+ \sinh \theta_\perp \partial_r \right)
\varepsilon(\lambda,\xi)
+ \varepsilon(\lambda,\xi) \left[ \cosh \theta_\perp \left( \frac{1}{\tau} + \partial_r \theta_\perp \right) + \sinh \theta_\perp \left(
\frac{1}{r} + \partial_\tau \theta_\perp \right) \right] \nonumber \\
&& + P_X(\lambda,\xi) \left( \cosh \theta_\perp \partial_r \theta_\perp + \sinh \theta_\perp \partial_\tau \theta_\perp \right) + P_Y(\lambda,\xi) \frac{\sinh \theta_\perp}{r} +P_Z(\lambda,\xi) \frac{\cosh \theta_\perp}{\tau} = 0,
\label{fineq1}
\end{eqnarray}
and
\begin{eqnarray}
&& \left( \sinh \theta_\perp \partial_\tau
+ \cosh \theta_\perp \partial_r \right) P_X(\lambda,\xi) + \varepsilon \left( \sinh \theta_\perp \partial_r \theta_\perp
+ \cosh \theta_\perp \partial_\tau \theta_\perp \right) \nonumber \\
&& + P_X(\lambda,\xi) \left[ \sinh \theta_\perp \left( \frac{1}{\tau} + \partial_r \theta_\perp \right) + \cosh \theta_\perp \left(
\frac{1}{r} + \partial_\tau \theta_\perp \right) \right] - P_Y(\lambda,\xi) \frac{\cosh \theta_\perp}{r} - P_Z(\lambda,\xi) \frac{\sinh \theta_\perp}{\tau} = 0. \label{fineq2}
\end{eqnarray}
The main result of the present work is that Eqs.~(\ref{fineq1}) and (\ref{fineq2}) should be supplemented with the two equations obtained from the second moment of the Boltzmann equation
\begin{eqnarray}
&& \frac{1}{1+\xi_I} \left( \cosh \theta_\perp \partial_\tau
+ \sinh \theta_\perp \partial_r \right) \xi_I
-\frac{1}{3}\sum_J\frac{1}{1+\xi_J}
\left( \cosh \theta_\perp \partial_\tau
+ \sinh \theta_\perp \partial_r \right) \xi_J
\nonumber \\
&& + 2\sigma_I + \frac{\xi_I}{\tau_{\rm eq}} \left( \frac{T}{\lambda} \right)^5\sqrt{ \prod_J(1+\xi_J) } = 0 \qquad (I=X,Y).
\label{fineq34}
\end{eqnarray}
The effective temperature appearing in (\ref{fineq34}) should be obtained from the Landau matching condition which, for the sake of convenience, we also repeat here
\begin{eqnarray}
\left(\frac{T}{\lambda}\right)^4 = {\cal R}(\xi).
\label{fineq5}
\end{eqnarray}
The numerical analysis of Eqs.~(\ref{fineq1})--(\ref{fineq5}) is left for a separate study.

\bigskip

{\bf Acknowledgments}: L.T. and W.F. were supported in part by the Polish National Science Center grants with decisions No. DEC-2012/06/A/ST2/00390 and No.
DEC-2012/05/B/ST2/02528, respectively.

\section{Appendix: Explicit formulas for derivatives}
\label{sect:explicitr}

The total time (or convective) derivative, $D = U^\alpha \partial_\alpha = U \cdot \partial$, describes the change of a physical quantity in the local rest frame. In the remaining part of this Section we collect the formulas involving $D$ and other derivatives which are useful in dealing with hydrodynamic equations.

\medskip \noindent Directional derivatives:
\begin{eqnarray}
U \cdot \partial &=&
\cosh\theta_\perp \partial_\tau
+ \sinh\theta_\perp \partial_r,
\quad Y \cdot \partial = \frac{1}{r} \partial_\phi,
\nonumber \\
X \cdot \partial &=& \sinh\theta_\perp \partial_\tau
+ \cosh\theta_\perp \partial_r,
\quad Z \cdot \partial = \frac{1}{\tau} \partial_{\eta_\parallel}.
\label{useful-eqns-1}
\end{eqnarray}

\medskip \noindent Divergencies:
\begin{eqnarray}
\partial \cdot U &=&
\cosh\theta_\perp \left(\frac{1}{\tau} + \partial_r \theta_\perp \right)
+ \sinh\theta_\perp \left( \frac{1}{r} + \partial_\tau \theta_\perp \right), \quad
\partial \cdot Y = 0,
\nonumber \\
\partial \cdot X &=&
\sinh\theta_\perp \left(\frac{1}{\tau} + \partial_r \theta_\perp \right)
+ \cosh\theta_\perp \left( \frac{1}{r} + \partial_\tau \theta_\perp \right), \quad
\partial \cdot Z = 0.
\label{useful-eqns-2}
\end{eqnarray}

\medskip \noindent Convective derivatives of $U$, $X$, $Y$, and $Z$:
\begin{eqnarray}
D U = (U \cdot \partial) U &=&
X \left(\cosh\theta_\perp \partial_\tau \theta_\perp + \sinh\theta_\perp \partial_r \theta_\perp \right), \quad D Y = (U \cdot \partial) Y = 0,
\nonumber \\
D X = (U \cdot \partial) X &=&
U \left(\cosh\theta_\perp \partial_\tau \theta_\perp + \sinh\theta_\perp \partial_r \theta_\perp \right), \quad D Z = (U \cdot \partial) Z = 0.
\label{useful-eqns-3}
\end{eqnarray}

\medskip \noindent Directional derivatives of $U$, $X$, $Y$, and $Z$:
\begin{eqnarray}
(X \cdot \partial) U &=&
X \left(\sinh\theta_\perp \partial_\tau \theta_\perp + \cosh\theta_\perp \partial_r \theta_\perp \right), \quad
(X \cdot \partial) Y = 0,
\nonumber \\
(X \cdot \partial) X &=&
U \left(\sinh\theta_\perp \partial_\tau \theta_\perp + \cosh\theta_\perp \partial_r \theta_\perp \right), \quad
(X \cdot \partial) Z = 0,
\label{useful-eqns-4}
\end{eqnarray}
\begin{eqnarray}
(Y \cdot \partial) U &=&
\frac{\sinh\theta_\perp}{r} \, Y, \quad
(Y \cdot \partial) Y = \frac{1}{r}\left( \frac{}{} \sinh\theta_\perp U - \cosh\theta_\perp X \right),
\nonumber \\
(Y \cdot \partial) X &=&
\frac{\cosh \theta_\perp}{r} \, Y, \quad
(Y \cdot \partial) Z =0,
\label{useful-eqns-5}
\end{eqnarray}
\begin{eqnarray}
(Z \cdot \partial) U &=&
\frac{\cosh\theta_\perp}{\tau} \, Z, \quad
(Z \cdot \partial) Y = 0,
\nonumber \\
(Z \cdot \partial) X &=&
\frac{ \sinh\theta_\perp}{\tau} \, Z , \quad
(Z \cdot \partial) Z = \frac{1}{\tau}\left( \frac{}{} \cosh\theta_\perp U - \sinh\theta_\perp X \right).
\label{useful-eqns-6}
\end{eqnarray}

\section{Appendix: Integrals for energy density and pressure}
\label{sect:R}

In order to pass from Eq.~(\ref{entr_sour_1}) to Eq.~(\ref{entr_sour_2}) we need several properties of the function ${\cal R}$ defined in~(\ref{eps1}). They follow most easily from the representation of ${\cal R}$ in the local rest frame,
\begin{eqnarray}\nonumber
\varepsilon &=& \int dP (p\cdot U)^2 \, f = k\int \frac{ {\rm d}^3 {\bf p} }{p} p^2 \exp\left[-\frac{1}{\lambda} \sqrt{ \sum_I (p^I)^2 \left( \frac{}{} 1 +\xi_I \right) } \right] \\
&=& \frac{ 6\, k\, \lambda^4 }{ \sqrt{ \prod_I (1 +\xi_I) } } \int_0^{2\pi}\!\!\!\! {\rm d}\phi \int_0^\pi \!\!\! {\rm d}\theta \sin\theta \sqrt{ \frac{ \cos^2\phi\sin^2\theta }{1+\xi_X} + \frac{ \sin^2\phi\sin^2\theta }{1+\xi_Y} + \frac{ \cos^2\theta }{1+\xi_Z} } = 24 \pi k \lambda^4 {\cal R}.
\end{eqnarray}
For simplicity of notation we use here the symbol $p^I$ to denote the three-momentum component $p_i$.

\bigskip

Similar expressions may be found for the pressures $P_X$, $P_Y$ and $P_Z$
\begin{eqnarray}
P_X &=& \int dP \, (p\cdot X)^2 \, f = k\int \frac{ {\rm d}^3 {\bf p} }{p} \left( p^X \right)^2 \exp\left[-\frac{1}{\lambda} \sqrt{ \sum_I (p^I)^2 \left( \frac{}{} 1 +\xi_I \right) } \right] \\ \label{H_X}
&=& \frac{ 6\, k\, \lambda^4 }{ \sqrt{ \prod_I (1 +\xi_I) } } \int_0^{2\pi}\!\!\!\! {\rm d}\phi \int_0^\pi \!\!\! {\rm d}\theta \sin\theta \frac{ \cos^2\phi \sin ^2\theta }{\left( 1 + \xi_X \right) \sqrt{ \frac{ \cos^2\phi\sin^2\theta }{1+\xi_X} + \frac{ \sin^2\phi\sin^2\theta }{1+\xi_Y} + \frac{ \cos^2\theta }{1+\xi_Z} } } = 24\, \pi \, k \, \lambda^4\; {\cal H}_X. \nonumber
\end{eqnarray}

\begin{eqnarray}
P_Y &=& \int dP \, (p\cdot Y)^2 \, f = k\int \frac{ {\rm d}^3 {\bf p} }{p} \left( p^Y \right)^2 \exp\left[-\frac{1}{\lambda} \sqrt{ \sum_I (p^I)^2 \left( \frac{}{} 1 +\xi_I \right) } \right] \\ \label{H_Y}
&=& \frac{ 6\, k\, \lambda^4 }{ \sqrt{ \prod_I (1 +\xi_I) } } \int_0^{2\pi}\!\!\!\! {\rm d}\phi \int_0^\pi \!\!\! {\rm d}\theta \sin\theta \frac{ \sin^2\phi \sin ^2\theta }{\left( 1 + \xi_Y \right) \sqrt{ \frac{ \cos^2\phi\sin^2\theta }{1+\xi_X} + \frac{ \sin^2\phi\sin^2\theta }{1+\xi_Y} + \frac{ \cos^2\theta }{1+\xi_Z} } } = 24\, \pi \, k \, \lambda^4\; {\cal H}_Y .
\end{eqnarray}

\begin{eqnarray}
P_Z &=& \int dP \, (p\cdot Z)^2 \, f = k\int \frac{ {\rm d}^3 {\bf p} }{p} \left( p^Z \right)^2 \exp\left[-\frac{1}{\lambda} \sqrt{ \sum_I (p^I)^2 \left( \frac{}{} 1 +\xi_I \right) } \right] = \\
\label{H_Z}
&=& \frac{ 6\, k\, \lambda^4 }{ \sqrt{ \prod_I (1 +\xi_I) } } \int_0^{2\pi}\!\!\!\! {\rm d}\phi \int_0^\pi \!\!\! {\rm d}\theta \sin\theta \frac{ \cos^2\theta }{\left( 1 + \xi_Z \right) \sqrt{ \frac{ \cos^2\phi\sin^2\theta }{1+\xi_X} + \frac{ \sin^2\phi\sin^2\theta }{1+\xi_Y} + \frac{ \cos^2\theta }{1+\xi_Z} } } = 24\, \pi \, k \, \lambda^4\; {\cal H}_Z.
\end{eqnarray}
The equations above define the functions ${\cal H}_I$.

\bigskip

\end{document}

\begin{figure}[t]
\begin{center}
\includegraphics[angle=0,width=0.55\textwidth]{Sigma.pdf}
\end{center}
\caption{(Color online) The function $-\sqrt{\Pi_J (1+\xi_J)} \sum_I {\cal H}_I \xi_I$ defining the entropy source in Eq.~(\ref{entr_sour_4}) for $\lambda=1$.}
\label{fig:Sigma}
\end{figure}
\title[Small ranks over function fields]{Jacobi sums, Fermat Jacobians,
\\and ranks of abelian varieties
\\over towers of function fields}

\section{Introduction}

\subsection{}
Given an abelian variety $A$ over a function field $K=k(\mathcal{C})$ with
$\mathcal{C}$ an absolutely irreducible, smooth, proper curve over a field
$k$, it is natural to ask about the behavior of the Mordell-Weil group of $A$ in the layers of a tower of fields over $K$. The simplest case, which is already very interesting, is when $A$ is an elliptic curve, $K=k(t)$ is a rational function field, and one considers the towers $k(t^{1/d})$ or $\overline k(t^{1/d})$ as $d$ varies through powers of a prime or through all integers not divisible by the characteristic of $k$.

When $k=\mathbb{Q}$ or more generally a number field, several authors (e.g.,
\cite{Shioda}, \cite{Stiller}, \cite{Fastenberg}, \cite{Silverman1},
\cite{Silverman2}, and \cite{Ellenberg}) have considered this question and given bounds on the rank of $A$ over $\mathbb{Q}(t^{1/d})$ or
${\overline{\mathbb{Q}}}(t^{1/d})$. In some interesting cases it can be shown that $A$
has rank bounded independently of $d$ in the tower ${\overline{\mathbb{Q}}}(t^{1/d})$.
Of course no example is yet known of an elliptic curve over $\mathbb{Q}(t)$
with unbounded ranks in the tower $\mathbb{Q}(t^{1/d})$, nor of an elliptic curve over ${\overline{\mathbb{Q}}}(t)$ with non-constant $j$-invariant and unbounded ranks in the tower ${\overline{\mathbb{Q}}}(t^{1/d})$.

When $k$ is a finite field, examples of Shioda and the author show that there are non-isotrivial elliptic curves over ${\mathbb{F}_p}(t)$ with unbounded ranks in the towers ${\overline{\mathbb{F}}_p}(t^{1/d})$ \cite{Shioda}*{Remark 10} and ${\mathbb{F}_p}(t^{1/d})$ \cite{UlmerR1}*{1.5}. More recently, the author has shown \cite{UlmerR2} that high ranks over function fields over finite fields are in some sense ubiquitous. For example, for every prime $p$ and every integer $g>0$ there are absolutely simple abelian varieties of dimension $g$ over ${\mathbb{F}_p}(t)$ with unbounded ranks in the tower ${\mathbb{F}_p}(t^{1/d})$, and given any non-isotrivial elliptic curve $E$ over ${\mathbb{F}_q}(t)$, there exists a finite extension ${\mathbb{F}}_{r}(u)$ such that $E$ has unbounded (analytic) ranks in the tower ${\mathbb{F}}_{r}(u^{1/d})$.

One obvious difference between number fields and finite fields which might be relevant here is the complexity of their absolute Galois groups: that of a finite field is pro-cyclic while that of a number field is highly non-abelian. Ellenberg uses this non-abelianess in a serious way in his work on bounding ranks and, in a private communication, he asked whether it might be the case that, say, a non-isotrivial elliptic curve over ${\mathbb{F}_q}(t)$ always has unbounded rank in the tower ${\mathbb{F}_p}(t^{1/d})$.

Our goal in this note, which is a companion to \cite{UlmerR2}, is to give a number of examples of abelian varieties over function fields
${\mathbb{F}_q}(t)$ which have bounded ranks in the towers ${\overline{\mathbb{F}}_q}(t^{1/d})$ as
$d$ ranges through powers of a suitable prime or through all integers not divisible by $p$, the characteristic of ${\mathbb{F}_q}$. We also get some information about ranks in towers $k(t^{1/d})$ for arbitrary fields
$k$. Along the way we prove some new results on Fermat curves which may be of independent interest. The main results are Theorems~\ref{thm:GaussJacobiBounds}, \ref{thm:Jd0}, \ref{thm:Jdp},
\ref{thm:isotrivial}, \ref{thm:nonisol}, and \ref{thm:nonisod}

\subsection{}
It is a pleasure to thank Jordan Ellenberg for his stimulating questions about ranks of elliptic curves as well as Brian Conrey, Bill McCallum, and Dinesh Thakur for their help. Special thanks are due to Bjorn Poonen for several incisive remarks and for pointing out that some arguments originally given for elliptic curves apply more generally to higher-dimensional abelian varieties.

\section{Jacobi sums}

\subsection{}
Throughout the paper $p$ will be a rational prime number, ${\mathbb{F}_p}=\mathbb{Z}/p\mathbb{Z}$
will be the prime field of characteristic $p$, and $q=p^f$ will be a power of $p$. Fix an algebraic closure ${\overline{\mathbb{Q}}}$ of $\mathbb{Q}$. All number fields considered will tacitly be assumed to be subfields of ${\overline{\mathbb{Q}}}$.
We denote by $\mu_d$ the group of $d$-th roots of unity in ${\overline{\mathbb{Q}}}$.

Let $\mathfrak{p}$ be a prime of $\mathcal{O}_{\overline{\mathbb{Q}}}$, the ring of integers of ${\overline{\mathbb{Q}}}$,
over $p$. The field $\mathcal{O}_{\overline{\mathbb{Q}}}/\mathfrak{p}$ is an algebraic closure of ${\mathbb{F}_p}$
which we denote by ${\overline{\mathbb{F}}_p}$ and we write ${\mathbb{F}_q}$ for its subfield of cardinality $q$.

Reduction modulo $\mathfrak{p}$ induces an isomorphism between the group of all roots of unity of order prime to $p$ in $\mathcal{O}_{\overline{\mathbb{Q}}}$ and the multiplicative group $(\mathcal{O}_{\overline{\mathbb{Q}}}/\mathfrak{p})^\times={\overline{\mathbb{F}}_p}^\times$. We let
$t:{\overline{\mathbb{F}}_p}^\times\to{\overline{\mathbb{Q}}}^\times$ denote the inverse of this isomorphism. We will use the same letter $t$ for the restriction to any of the finite fields ${\mathbb{F}_q^\times}$. Every character of ${\mathbb{F}_q^\times}$ is a power of $t$.

\subsection{}
Fix a non-trivial additive character $\psi_p:{\mathbb{F}_p}\to{\overline{\mathbb{Q}}}^\times$. For each $q$ we define an additive character $\psi_q$ as
$\psi_q=\psi_p\circ\tr_{{\mathbb{F}_q}/{\mathbb{F}_p}}$.

For each $q$ and each character $\chi$ of ${\mathbb{F}_q^\times}$, we define a Gauss sum
$$G_q(\chi)=-\sum_{x\in{\mathbb{F}_q^\times}}\chi(x)\psi_q(x)\in\mathbb{Q}(\mu_{p(q-1)}).$$
It is well known that $G_q(\chi)=1$ if $\chi$ is the trivial character and that $G_q(\chi)$ is an algebraic integer with absolute value
$q^{1/2}$ in every complex embedding if $\chi\neq1$.

For $d$ prime to $q$, $a\in\mathbb{Z}/d\mathbb{Z}$, and any $q\equiv1\pmod d$ we write
$G_q(a)$ for $G_q(t^{-a(q-1)/d})$ which lies in $\mathbb{Q}(\mu_{pd})$. The analysis leading to Stickelberger's theorem \cite{Washington}*{6.2}
shows that if $\wp$ is the prime of $\mathbb{Q}(\mu_{pd})$ under $\mathfrak{p}$,
$q=p^f$, and $a\not\equiv0\pmod{d}$ then
$$\ord_{\wp}G_q(a)=(p-1)\sum_{j=0}^{f-1}
\left\langle\frac{p^ja}{d}\right\rangle$$
where $\langle x\rangle$ is the fractional part of $x$, i.e.,
$0\le\langle x\rangle<1$ and $x-\langle x\rangle\in\mathbb{Z}$.

\subsection{}\label{ss:JacobiSums}
Fix a positive integer $w$. For each $q$ and each tuple of non-trivial characters $\chi_0,\dots,\chi_{w+1}$ of ${\mathbb{F}_q^\times}$ such that the product $\chi_0\cdots\chi_{w+1}$ is trivial, we define a Jacobi sum
$$J_q(\chi_0,\dots,\chi_{w+1})=\frac1{q-1}
\sum_{\substack{x_0,\dots,x_{w+1}\in{\mathbb{F}}_{p^f}^\times\\x_0+\cdots+x_{w+1}=0}}
\chi_0(x_0)\cdots\chi_{w+1}(x_{w+1})\in\mathbb{Q}(\mu_{q-1}).$$

It is well-known and elementary (see \cite{WeilNS}*{p.~501} for example) that
$$J_q(\chi_0,\dots,\chi_{w+1})=\frac{(-1)^w}q\prod_{i=0}^{w+1}G_q(\chi_i).$$
In particular, the Jacobi sum is an algebraic integer with absolute value $q^{w/2}$ in every complex embedding.

Let $A_{d,w}\subset(\mathbb{Z}/d\mathbb{Z})^{w+2}$ be the set of tuples
${\mathbf{a}}=(a_0,\dots,a_{w+1})$ such that $a_i\neq0$ for all $i$ and $\sum a_i=0$. If ${\mathbf{a}}\in A_{d,w}$ and $q\equiv1\pmod d$, we write $J_q({\mathbf{a}})$
for $J_q(t^{-a_0(q-1)/d},\dots,t^{-a_{w+1}(q-1)/d})$; clearly
$J_q({\mathbf{a}})\in\mathbb{Q}(\mu_d)$. If $\wp$ is the prime of $\mathbb{Q}(\mu_{d})$ under
$\mathfrak{p}$ and $q=p^f$, then
$$\ord_\wp J_q({\mathbf{a}})=\sum_{i=0}^{w+1}\sum_{j=0}^{f-1}
\left\langle\frac{p^ja_i}{d}\right\rangle-f.$$

We write $A'_{d,w}$ for those ${\mathbf{a}}\in A_{d,w}$ such that
$\gcd(d,a_0,\dots,a_{w+1})=1$. Note that if ${\mathbf{a}}\in A_{d,w}$ and if
$e=\gcd(d,a_0,\dots,a_{w+1})$, $d'=d/e$ and
${\mathbf{a}}'=(a_0/e,\dots,a_{w+1}/e)\in A'_{d',w}$ then for any $q\equiv1\pmod d$ we have $J_q({\mathbf{a}})=J_q({\mathbf{a}}')$.

Many of our results on ranks will be based on part (2) of the following theorem about the distribution of Gauss and Jacobi sums.
Roughly speaking, it says that sums involving characters of large order must either have large degree over $\mathbb{Q}$ or have valuation bounded away from 0.

\begin{thm}\label{thm:GaussJacobiBounds}
\hfill\break
\begin{enumerate}
\item \vskip-12pt Fix a real number $\epsilon>0$ and a positive integer $n$. There exists a constant $C_{\epsilon,n}$ depending only on $\epsilon$ and $n$ such that if $d>C_{\epsilon,n}$,
$q=p^f\equiv1\pmod d$, $a\in(\mathbb{Z}/d\mathbb{Z})^\times$, and the degree of
$G_q(a)$ over $\mathbb{Q}(\mu_p)$ is $\le n$, then
$$\left|\frac{\ord_\wp G_q(a)}{(p-1)f}-\frac12\right|<\epsilon.$$
Here $\wp$ is the prime of $\mathbb{Q}(\mu_{pd})$ under $\mathfrak{p}$. Note that
$\ord_\wp(q)=(p-1)f$.
\item Fix a positive integer $n$. There exist constants $C_n$ and
$\epsilon_n>0$ depending only on $n$ such that if $d>C_n$,
$q=p^f\equiv1\pmod d$, $w\ge1$, ${\mathbf{a}}\in A'_{d,w}$, and the degree of
$J_q({\mathbf{a}})$ over $\mathbb{Q}$ is $\le n$, then
$$\frac{\ord_\wp J_q({\mathbf{a}})}{f}>\epsilon_n.$$
Here $\wp$ is the prime of $\mathbb{Q}(\mu_d)$ under $\mathfrak{p}$. Note that
$\ord_\wp(q)=f$.
\end{enumerate}
\end{thm}

\begin{rems}
\hfill\break
\begin{enumerate}
\item \vskip-12pt The constants appearing in the theorem are {\it independent of\/} $p$ and effectively computable.
\item In part (2) of the theorem, we may replace ``the degree of
$J_q({\mathbf{a}})$ over $\mathbb{Q}$ is $\le n$'' with ``the degree of the largest subfield of $\mathbb{Q}(J_q({\mathbf{a}}))$ in which $p$ splits completely is $\le n$'' and similarly in part (1). I do not know whether this has any applications to geometry.
\end{enumerate}
\end{rems}

The theorem is a consequence of Stickelberger's theorem and the following simple estimate.

\begin{prop}\label{prop:BasicEstimate}
Fix a real number $\epsilon>0$ and a positive integer $n$. There exists a constant $C_{\epsilon,n}$ depending only on $\epsilon$ and
$n$ such that if $d>C_{\epsilon,n}$ and $H\subset G=(\mathbb{Z}/d\mathbb{Z})^\times$
is a subgroup of index $\le n$, then for all $a\in G$,
$$\left|\frac{1}{|H|}\sum_{t\in H}\left\langle\frac{ta}{d}
\right\rangle-\frac12\right|< \epsilon.$$
\end{prop}

\begin{proof}
We have
\begin{align*}
A:=\frac{1}{|H|}\sum_{t\in H}\left\langle\frac{ta}{d}\right\rangle
&=\frac{1}{|H|}\sum_{\substack{s=1\\(s,d)=1}}^{d-1}\frac sd\frac1{[G:H]}
\sum_{\chi\in\widehat{G/H}}\chi(sa^{-1})\\
&=\frac12 +\frac1{d\phi(d)}\sum_{1\neq\chi\in\widehat{G/H}}\chi(a^{-1})
\sum_{\substack{s=1\\(s,d)=1}}^{d-1}\chi(s)s
\end{align*}
where $|H|$ denotes the order of $H$, $\widehat{G/H}$ denotes the group of characters of $G/H$ (which we view as characters of $G$
trivial on $H$), and $\phi(d)=|G|$ is Euler's function. Partial summation and the Polya-Vinogradov inequality \cite{Davenport}*{\S23}
show that there is an absolute constant $C$ such that the inner sum above is $<Cd^{3/2}\log d$ and so the quantity $A$ to be estimated satisfies
$$\left|A-\frac12\right|\le\frac{Cnd^{1/2}\log d}{\phi(d)}.$$
Well-known estimates for $\phi(d)$ \cite{HardyWright}*{Thm~327} say that for all $\delta>0$, $\phi(d)/d^{1-\delta}\to\infty$ as
$d\to\infty$ so there is a constant $C_{\epsilon,n}$ depending only on $n$ and $\epsilon $ such that
$$\frac{Cnd^{1/2}\log d}{\phi(d)}< \epsilon$$
whenever $d>C_{\epsilon,n}$. This completes the proof of the proposition.
\end{proof}

\begin{cor}\label{cor:alld}
Given $n$ there exists a constant $\delta_n>0$ depending only on $n$
such that for any $d\ge2$, any $0\neq a\in\mathbb{Z}/d\mathbb{Z}$, and any subgroup
$H\subset G=(\mathbb{Z}/d\mathbb{Z})^\times$ of index $\le n$,
$$\frac{1}{|H|}\sum_{t\in H}\left\langle\frac{ta}{d}\right\rangle>\delta_n$$
\end{cor}

\begin{proof}
For $0\neq a\in(\mathbb{Z}/d\mathbb{Z})$, set $e=\gcd(a,d)$, $d'=d/e$,
$G'=(\mathbb{Z}/d'\mathbb{Z})^\times$, $a'=a/e$, and $H'=\im(H\to G')$. Then the index of $H'$ in $G'$ is $\le n$ and we have
$$A:=\frac{1}{|H|}\sum_{t\in H}\left\langle\frac{ta}{d}\right\rangle=
\frac{1}{|H'|}\sum_{t\in H'}\left\langle\frac{ta'}{d'}\right\rangle$$
and so we may assume that $\gcd(a,d)=1$, i.e., that $a\in G$.

Given $n$, let $C_{1/4,n}$ be the constant furnished by the proposition for $n$ and $\epsilon =1/4$. If $d>C_{1/4,n}$ then by the proposition, $A>1/4$. On the other hand, there are only finitely many
$d\le C_{1/4,n}$ and for each $d$, only finitely many subgroups
$H\subset(\mathbb{Z}/d\mathbb{Z})$ of index $\le n$. Since $A>0$ for each of these finitely many possibilities, there is a $\delta_n>0$ such that
$A>\delta_n$ for all $d$ and $a$.
\end{proof}

\subsection{Proof of Theorem~\ref{thm:GaussJacobiBounds} (1)}
Given $\epsilon$ and $n$, suppose that $d$, $q=p^f\equiv1\pmod d$, and
$a\in G=(\mathbb{Z}/d\mathbb{Z})^\times\cong\gal(\mathbb{Q}(\mu_{pd})/\mathbb{Q}(\mu_p))$, are such that $G_q(a)\in\mathbb{Q}(\mu_{pd})$ has degree $\le n$ over $\mathbb{Q}(\mu_p)$. Let
$H\subset G$ be the subgroup of $G$ fixing $\mathbb{Q}(\mu_p,G_q(a))$, so that
$H$ has index $\le n$ in $G$. If $\wp$ is the prime of $\mathbb{Q}(\mu_{pd})$
under $\mathfrak{p}$, then for every $t\in H$, we have
$\ord_{\wp^t}(G_q(a))=\ord_{\wp}(G_q(a))$. Therefore,
\begin{align*}
\frac{\ord_{\wp}G_q(a)}{(p-1)f}
&=\frac1{|H|}\sum_{t\in H} \frac{\ord_{\wp^t}G_q(a)}{(p-1)f}\\
&=\frac1{|H|f}\sum_{t\in H}\sum_{j=0}^{f-1}
\left\langle \frac{p^jta}{d}\right\rangle
\end{align*}
where the second equality comes from Stickelberger's theorem. Let
$P$ be the subgroup of $(\mathbb{Z}/d\mathbb{Z})^\times$ generated by $p$ and $HP$
the subgroup generated by $H$ and $P$. The last displayed sum is then equal to
$$\frac{1}{|HP|}\sum_{t\in HP}\left\langle\frac{ta}{d}\right\rangle.$$
Since $H$ has index $\le n$ in $G$, the same is true of $HP$ and so Proposition~\ref{prop:BasicEstimate} shows that if $d>C_{\epsilon,n}$
then
$$\left|\frac{\ord_{\wp}G_q(a)}{(p-1)f}-\frac12\right|<\epsilon$$
as was to be shown.
\qed

\subsection{Proof of Theorem~\ref{thm:GaussJacobiBounds} (2)}
Given ${\mathbf{a}}\in A'_{d,w}$, set $d_i=d/\gcd(d,a_i)$. The following lemma tells us that if $d$ is large then at least two of the $d_i$ are also large.

\begin{subsublemma}
With notation as above, there exists an absolute constant $C$ such that at least two of the $d_i$ are $\ge C\log d$.
\end{subsublemma}

\begin{proof}
If $\ell$ divides $d$ then from the definitions, there are at least two $i$'s such that $\ell$ does not divide $a_i$. Therefore the largest prime power dividing $d$ also divides at least two of the
$d_i$.

To finish we note that Chebyschev's theorem
\cite{HardyWright}*{Thm.~7} implies that the the largest prime power dividing $d$ is $\ge C'\log d$ for some absolute constant $C'$.
Indeed, let $M$ be a positive number, let $p_1,\dots,p_{\pi(M)}$ be the primes less than $M$, and let $p_i^{e_i}$ be the largest power of
$p_i$ less than $M$. If $N=\prod_{i=1}^{\pi(M)}p_i^{e_i}$ then
$$\log N=\sum_{i=1}^{\pi(M)}e_i\log p_i\le\pi(M)\log M\le C'M$$
by Chebyschev. This shows that if $N$ is a product of prime powers less than $M$, then $N\le e^{C'M}$. Therefore the largest prime power dividing $N$ is at least $C\log N$ where $C=1/C'$.
\end{proof}

Now fix $n$ and consider those $q\equiv1\pmod d$ and ${\mathbf{a}}$ such that
$J_q({\mathbf{a}})$ has degree $\le n$ over $\mathbb{Q}$. Let $H\subset G=(\mathbb{Z}/d\mathbb{Z})^\times\cong\gal(\mathbb{Q}(\mu_d)/\mathbb{Q})$ be the subgroup fixing
$\mathbb{Q}(J_q({\mathbf{a}}))$ so that $H$ has index $\le n$ in $G$. Then we have
\begin{align*}
A(q,{\mathbf{a}}):=\frac{\ord_{\wp}J_q({\mathbf{a}})}{f}
&=\frac{1}{|H|}\sum_{t\in H}\frac{\ord_{\wp^t}(J_q({\mathbf{a}}))}{f}\\
&=\frac{1}{|H|}\sum_{t\in H}\frac 1f \left(\sum_{i=0}^{w+1}
\sum_{j=0}^{f-1}\left\langle\frac{tp^ja_i}{d}\right\rangle-f\right)\\
&=\left(\sum_{i=0}^{w+1}\frac{1}{|HP|}
\sum_{t\in HP}\left\langle\frac{ta_i}{d}\right\rangle\right)-1
\end{align*}
where as before $P$ is the subgroup of $(\mathbb{Z}/d\mathbb{Z})^\times$ generated by
$p$ and $HP$ is the subgroup generated by $H$ and $P$. Reindexing
${\mathbf{a}}$, we may assume that $d_0$ and $d_1$ are $\ge C\log d$. Since $H$
has index $\le n$, so does $HP$ and so we get bounds on the inner sums in the last displayed equation. More precisely, by Corollary~\ref{cor:alld}, the inner sum is $>\delta_n$ for
$i=2,\dots,w+1$, and by Proposition~\ref{prop:BasicEstimate}, if $d_0$
and $d_1$ are sufficiently large (so that $C\log d>C_{\epsilon,n}$),
the $i=0$ and $i=1$ terms are $>1/2-\epsilon$. Applying this with
$\epsilon=\delta_n/4\le w\delta_n/4$, we see that for sufficiently large $d$, $A(q,{\mathbf{a}})\ge(w-1/2)\delta_n\ge\delta_n/2$. This completes the proof of part (2) of the theorem. \qed

\section{Fermat Jacobians}

\subsection{}
Let $k$ be an arbitrary field with separable closure $\overline k$.
For each positive integer $d$ not divisible by the characteristic of
$k$ we consider the Fermat curve $F_d$ of degree $d$ over $k$ (the zero locus of $\sum_{i=0}^2x_i^d$ in $\mathbb{P}^2$) and its Jacobian $J_d$.
If $A$ is an abelian variety over $k$, we say that ``$A$ appears in
$J_d$'' if there is a homomorphism of abelian varieties $A\to J_d$
with finite kernel. We say ``$A$ appears in $J_d$ with multiplicity
$m$'' if $m$ is the largest integer such that $A^m$ appears in $J_d$.
The multiplicity with which $A$ appears in $J_d$ obviously depends only on the $k$-isogeny class of $A$.

The following two theorems are the main results of this section.

\begin{thm}\label{thm:Jd0}
Suppose that $k$ is a field of characteristic zero. Then for every positive integer $g$, only finitely many $k$-isogeny classes of abelian varieties of dimension $\le g$ appear in $J_d$ as $d$ varies through all positive integers. If $A$ is an abelian variety over
$k$, then the multiplicity with which $A$ appears in $J_d$ is bounded by a constant depending only on the dimension of $A$.
\end{thm}

If $k$ has characteristic $p$ and $A$ is an abelian variety over $k$,
the $p$-rank of $A$ is by definition the dimension over ${\mathbb{F}_p}$ of the group of $\overline k$-rational $p$-torsion points on $A$. It is known that the $p$-rank lies in the interval $[0,\dim A]$ and that it is invariant under isogeny.

\begin{thm}\label{thm:Jdp}
Suppose that $k$ is a field of characteristic $p>0$. Then for every positive integer $g$, only finitely many $k$-isogeny classes of abelian varieties with positive $p$-rank and dimension $\le g$
appear in $J_d$ as $d$ varies through all positive integers prime to
$p$. If $A$ is an abelian variety over $k$ with positive $p$-rank,
then the multiplicity with which $A$ appears in $J_d$ is bounded by a constant depending only on the dimension of $A$.
\end{thm}

\begin{rems}
\hfill\break
\begin{enumerate}
\item \vskip-12pt We repeat that the constants in the theorems depend only on the dimension $g$. In particular, they are independent of the characteristic of $k$. As will be clear from the proof, they are also effectively computable.
\item Theorem~\ref{thm:Jd0} is already known in a more precise quantitative form by results of Aoki \cite{Aoki}, building on work of Koblitz, Rohrlich, and Shioda. Theorem~\ref{thm:Jdp} may be known to experts but to my knowledge is not in the literature. We will give a very simple proof of Theorem~\ref{thm:Jdp} for $k$
finite and use this to deduce the general case and Theorem~\ref{thm:Jd0}.
\item It is proven in \cite{TS}, and by a different method in
\cite{UlmerR2}, that over a field of characteristic $p$, the multiplicity with which a supersingular elliptic curve appears in
$J_d$ is unbounded as $d$ varies. Thus the last part of Theorem~\ref{thm:Jdp} would be false without the hypothesis of positive $p$-rank. It is not clear what to expect for abelian varieties with $p$-rank zero which are not $\overline k$-isogenous to a product of supersingular elliptic curves.
\end{enumerate}
\end{rems}

The proofs of the theorems will be given in rest of this section.

\subsection{}\label{ss:oldnew}
If $d'<d$ is a divisor of $d$, then there is a canonical surjective morphism $F_d\to F_{d'}$ ($x_i\mapsto x_i^{d/d'}$) which (because
$F_d\to F_{d'}$ is totally ramified at some place) induces an injection of Jacobians $J_{d'}\hookrightarrow J_d$. We define the {\it old part\/} $J_d^\text{old}$ to be the abelian subvariety of $J_d$
generated by the images of the morphisms $J_{d'}\hookrightarrow J_d$ as $d'$
varies through proper divisors of $d$ and we define the {\it new part\/} $J_d^\text{new}$ of $J_d$ to be the abelian variety over $k$
(well-defined only up to $k$-isogeny) such that $J_d$ is isogenous to
$J_d^\text{new}\times J_d^\text{old}$. It is not hard to check, for example by using the zeta function calculation mentioned in
\ref{ss:FermatZetas} below, that $J_d$ is isogenous to
$\prod_{d'|d}J_{d'}^\text{new}$.

Theorem~\ref{thm:Jd0} therefore follows from the statement that there is a constant $C_g$ depending only on $g$ such that no abelian variety
$A$ of dimension $\le g$ appears in $J_d^\text{new}$ for any $d>C_g$.
Theorem~\ref{thm:Jdp} follows from the same statement with the additional hypotheses that $A$ has positive $p$-rank and $d$ is not divisible by $p$.

\subsection{}\label{ss:algfields}
Given a field $k$, let $\mathbb{F}$ be its prime field and $k_0$ be the algebraic closure of $\mathbb{F}$ in $k$. Then $k_0$ is a perfect field and so the extension $k/k_0$ is regular. The Fermat Jacobian $J_d$ and its new part $J_d^\text{new}$ are defined over $\mathbb{F}$ and so if $A$ is an abelian variety over $k$ which appears in
$J_d^\text{new}\times_{\mathbb{F}} k$ then there is an abelian variety $A_0$
defined over $k_0$ which appears in $J_d^\text{new}\times_{\mathbb{F}} k_0$
and with $A_0\times_{k_0} k\cong A$. (This is an old result of Chow which has been given a detailed modern treatment by Conrad, see
\cite{Conrad}*{3.21}.) Moreover, the abelian variety $A_0$ and the morphism $A_0\to J_d^\text{new}$ are both defined over some finite extension of $\mathbb{F}$. Thus it will suffice to prove the existence of the constants $C_g$ mentioned at the end of Subsection~\ref{ss:oldnew}
(depending only on $g$, not on $k$) for the cases when $k$ is a number field or a finite field.

\subsection{}\label{ss:HondaTate}
Let $k$ be ${\mathbb{F}_q}$, the subfield of ${\overline{\mathbb{F}}_p}=\mathcal{O}_{{\overline{\mathbb{Q}}}}/\mathfrak{p}$ with $q$
elements. A {\it Weil $q$-integer of weight 1\/} is an algebraic integer $\alpha$ whose absolute value in every complex embedding is
$q^{1/2}$. For the rest of this section we will call these simply
{\it Weil numbers\/}.

Honda-Tate theory \cite{TateHT} says that ${\mathbb{F}_q}$-isogeny classes of
${\mathbb{F}_q}$-simple abelian varieties are in bijection with $\gal({\overline{\mathbb{Q}}}/\mathbb{Q})$
orbits of Weil numbers. If $A$ corresponds to $\alpha$, then
$E=\en_{{\mathbb{F}_q}}(A)\otimes\mathbb{Q}$ is a central simple algebra over
$\mathbb{Q}(\alpha)$ whose invariants in the Brauer group of $\mathbb{Q}(\alpha)$ can be calculated in terms of the decomposition of $p$ in $\mathbb{Q}(\alpha)$.
The dimension of $A$ is $(1/2)[E:\mathbb{Q}(\alpha)]^{1/2}[\mathbb{Q}(\alpha):\mathbb{Q}]$ and the eigenvalues of Frobenius on $H^1(A\times{\overline{\mathbb{F}}_q},{\mathbb{Q}_\ell})$ are the conjugates of $\alpha$, each appearing with multiplicity
$[E:\mathbb{Q}(\alpha)]^{1/2}$. The $p$-rank of $A$ is equal to the number of eigenvalues of Frobenius which are units at $\mathfrak{p}$ and so $A$ has positive $p$-rank if and only if some conjugate of $\alpha$ is a unit at $\mathfrak{p}$.

If $C$ is a curve of genus $g$ over ${\mathbb{F}_q}$ and the $Z$-function of $C$
is
$$\frac{\prod_{i=1}^{2g}(1-\alpha_iT)}{(1-T)(1-qT)}$$
then the Weil numbers of the ${\mathbb{F}_q}$-simple factors of the Jacobian $J$
of $C$ are precisely the $\alpha_i$. The multiplicity of $\alpha_i$
in the numerator is the multiplicity of the corresponding $A$ in $J$
up to ${\mathbb{F}_q}$-isogeny times $[E_A:\mathbb{Q}(\alpha)]^{1/2}$.

\subsection{}\label{ss:FermatZetas}
Given a positive integer $d$ and a prime power $q$ such that $q\equiv 1\pmod{d}$ we consider the Fermat curve $F_d$ over ${\mathbb{F}_q}$. By a theorem of Weil \cite{WeilNS}, the $Z$-function of $F_d$ over ${\mathbb{F}_q}$ is
$$\frac{\prod_{{\mathbf{a}}\in A_{d,1}}(1-J_q({\mathbf{a}})T)}{(1-T)(1-qT)}$$
where $A_{d,1}$ was defined in Subsection~\ref{ss:JacobiSums}.

It is clear from Weil's computation of the $Z$-function that the Weil numbers of $J_d^\text{new}$ are precisely the $J_q({\mathbf{a}})$ as ${\mathbf{a}}$ runs through
$$A'_{d,1}=\{{\mathbf{a}}=(a_0,a_1,a_2)\in A_{d,1}|\gcd(d,a_0,a_1,a_2)=1\}.$$

\subsection{The case of finite fields}
We assume $k={\mathbb{F}_q}$ and that $A$ is an abelian variety over $k$ which has positive $p$-rank and dimension $\le g$ and appears in
$J_d^\text{new}$. In this case, the Weil numbers of $A$ are among the Weil numbers of $J_d^\text{new}$. Extending $k$ if necessary, we may assume that $d|(q-1)$ and so the Weil numbers of $J_d^\text{new}$ are the Jacobi sums $J_q({\mathbf{a}})$ where ${\mathbf{a}}\in A'_{d,1}$. By the results recalled in Subsections~\ref{ss:HondaTate} and \ref{ss:FermatZetas} it follows that some $J_q({\mathbf{a}})$ has degree $\le 2g$ over $\mathbb{Q}$ and is a unit at the prime $\mathfrak{p}$. This implies that $d$ is $\le C_{2g}$ where
$C_{2g}$ is the constant appearing in Theorem~\ref{thm:GaussJacobiBounds}(2) for $n=2g$. Therefore no abelian variety of positive $p$-rank and dimension $\le g$ appears in
$J_d^\text{new}$ for large $d$ and this establishes Theorem~\ref{thm:Jdp} for finite fields. The argument in Subsection~\ref{ss:algfields} shows that the theorem also holds for arbitrary fields of positive characteristic.

\subsection{The case of number fields}
Suppose that $A$ is an abelian variety of dimension $\le g$ defined over a number field $k$. Suppose that $d$ is larger than the constant
$C(g)=C_{2g}$ appearing in Theorem~\ref{thm:Jdp} and that $A$ appears in $J_d^\text{new}$. Then for every prime $\wp$ of $k$ where $A$ has good reduction, by Theorem~\ref{thm:Jdp} the reduction $A\times
\mathbb{F}_\wp$ has $p$-rank 0. This would violate the following result,
which appears in \cite{Ogus}*{2.7.1}:

\begin{subsublemma} \textup{(}Katz\textup{)}
If $A$ is an abelian variety over a number field $k$, then for infinitely many primes of $k$, the reduction of $A$ has positive
$p$-rank.
\end{subsublemma}

For the convenience of the reader, we sketch the proof of the lemma.
Choose a prime $\ell$ larger than $2g$. Let $L$ be a finite extension of $k$ such that $\gal({\overline{\mathbb{Q}}}/L)$ acts trivially on the $\ell$-torsion of $A$. If $\wp$ is a prime of $L$ over the rational prime $p$ where the reduction of $A$ has $p$-rank zero, then the trace of the Frobenius at $\wp$ on $H^1(A\times\overline k,{\mathbb{Q}_\ell})$ is an integer
$\equiv0\pmod{p}$ and $\le2g(\N\wp)^{1/2}$. If $\wp$ has absolute degree 1 over $\mathbb{Q}$ (i.e., $\N\wp=p$), and $\sqrt{p}>2g$ then we see that the trace must be zero. On the other hand, since $\gal({\overline{\mathbb{Q}}}/L)$
acts trivially on $\ell$-torsion, the trace must be
$\equiv2g\pmod\ell$. Since $\ell>2g$ this is impossible. The conclusion is that the reduction of $A$ at a prime of absolute degree one over a large $p$ must have positive $p$-rank. Such primes have density one in $L$ and the primes under them in $k$ are an infinite set satisfying the conclusion of the lemma.

We note that a stronger version of this result for abelian varieties over $\mathbb{Q}$ is proven in \cite{BayerGonzalez}*{Prop.~5.1}.

The lemma completes the proof of Theorem~\ref{thm:Jd0} for number fields and, as explained in Subsection~\ref{ss:algfields}, therefore also for arbitrary fields of characteristic zero.

\section{Isotrivial abelian varieties with bounded ranks in
$\hat\mathbb{Z}$ or ${\hat\mathbb{Z}^{(p)}}$-towers}

\subsection{}
In the rest of the paper we will give examples of abelian varieties with bounded ranks in towers of function fields over various fields
$k$. Before doing so, let us dispense with a trivial situation: if
$A$ is an abelian variety over $k(t)$ with good reduction away from
$0$ and $\infty$ and at worst tame ramification at $0$ and $\infty$,
then for any $d$ prime to the characteristic of $k$, the degree of the conductor of $A$ over $k(t^{1/d})$ is bounded independently of $d$.
Geometric rank bounds then show that the rank of $A$ over $k(t^{1/d})$
is also bounded independently of $d$. Therefore it is only interesting to consider situations where the degree of the conductor grows in the tower under consideration. All our examples below are of this type.

\subsection{}
We review some well-known facts about constant and isotrivial abelian varieties. Let $k$ be any field, let $L$ be the function field of a geometrically irreducible curve $\mathcal{C}$ smooth and proper over $\spec k$, and let $J$ be the Jacobian of $\mathcal{C}$. Let $A_0$ be an abelian variety over $k$ and let $A=A_0\times_kL$. Then it is clear that
$A(L)$, the group of $L$-rational points of $A$, is canonically isomorphic to $\mor_k(\mathcal{C},A_0)$, the group of $k$-scheme morphisms from $\mathcal{C}$ to $A_0$. Moreover, we have an exact sequence
\begin{equation*}
0\to A_0(k)\to\mor_k(\mathcal{C},A_0)\to\Hom_{k\text{-av}}(J,A_0)
\end{equation*}
where a $k$ point of $A_0$ is sent to the constant map with that value and a morphism from $\mathcal{C}$ to $A_0$ is sent to the homomorphism of abelian varieties induced by Albanese functoriality. If $\mathcal{C}$ has a $k$-rational divisor of degree 1 (for example if $k$ is finite) then the last map above is surjective. If $k$ is finitely generated over its prime field, then by the Lang-N\'eron theorem, $A_0(k)$ is finitely generated. (See \cite{Conrad} for a modern treatment of the Lang-N\'eron theorem.) For any $k$, $\Hom_{k\text{-av}}(J,A_0)$ is finitely generated and torsion free. If $A_0$ is $k$-simple, then the rank of $\Hom_{k\text{-av}}(J,A_0)$ is equal to the rank of the endomorphism ring of $A_0$ times the multiplicity with which $A_0$
appears in $J$ up to $k$-isogeny.

\subsection{}
Continuing with the notation of the last subsection, suppose that
$\mathcal{C}$ is hyperelliptic, i.e., we are given a degree 2 morphism
$\mathcal{C}\to\mathbb{P}^1$. Let $A'$ be the twist of $A=A_0\times_k k(t)$ by the quadratic extension $L/k(t)$. Since there are no non-constant morphisms from $\mathbb{P}^1$ to an abelian variety, we have
$A(k(t))=A_0(k)$. Since
\begin{equation*}
A(L)\otimes\mathbb{Q}\cong \left(A(k(t))\otimes\mathbb{Q}\right) \bigoplus
\left(A'(k(t))\otimes\mathbb{Q}\right)
\end{equation*}
we conclude that $A'(k(t))$ has finite rank, bounded above by
\begin{equation}\label{eq:isotrank}
\dim_\mathbb{Q} A'(k(t))\otimes\mathbb{Q}=\dim_\mathbb{Q} \Hom_{k\text{-av}}(J,A_0)\otimes\mathbb{Q}
=\rk_\mathbb{Z} \Hom_{k\text{-av}}(J,A_0)
\end{equation}
with equality when $\mathcal{C}$ has a $k$-rational divisor of degree 1.

\subsection{}
We can now apply the rank formula above and our results about Fermat Jacobians to give examples of bounded ranks in towers. Let $K_1=k(t)$
and for every positive integer $d$ not divisible by the characteristic of $k$, let $K_d=k(t^{1/d})$. If the characteristic of $k$ is not 2,
let $L_1=k(u)$ with $u^2=t-1$; if the characteristic of $k$ is 2, let
$L_1=k(u)$ with $u^2+u=t$. For all $d$ prime to the characteristic of
$k$, let $L_d=L_1K_d=k(t^{1/d},u)$. Note that $L_d$ is the function field of a hyperelliptic curve $C_d$ over $k$. Using ideas analogous to \cite{UlmerR2}*{\S6}, one checks easily that there is a totally ramified, surjective morphism from a Fermat curve $F_{n}\to\mathcal{C}_d$;
here $n=2d$ if the characteristic of $k$ is not 2 and $n=d$ if the characteristic of $k$ is 2. It follows that the Jacobian of $C_d$ is an isogeny factor of $J_{n}$. Applying the rank formula
\ref{eq:isotrank} and Theorems~\ref{thm:Jd0} and \ref{thm:Jdp} we have the following.

\begin{thm}\label{thm:isotrivial}
Let $k$ be a field and $A_0$ an abelian variety over $k$. If the characteristic of $k$ is $p>0$, assume that $A_0$ is isogenous to a product of $k$-simple abelian varieties each with positive $p$-rank.
Let $A=A\times_k k(t)$ and let $A'$ be the twist of $A$ by the quadratic extension $k(u)/k(t)$ where $u$ satisfies $u^2=t-1$ if the characteristic of $k$ is not 2 and $u^2+u=t$ if the characteristic of $k$ is 2. Then the rank of the Mordell-Weil group $A'(k(t^{1/d}))$ is bounded as $d$ varies through all positive integers relatively prime to the characteristic of $k$.
\end{thm}

\section{Non-isotrivial elliptic curves with bounded ranks in $\mathbb{Z}_\ell$-towers}

\subsection{}
For examples of non-isotrivial elliptic curves with bounded ranks in
$\mathbb{Z}_\ell$ extensions, we consider the curve $E$ discussed in
\cite{UlmerR1} with affine equation
$$y^2+xy=x^3-t$$
over ${\mathbb{F}_p}(t)$.

\bigskip
\begin{thm}~\label{thm:nonisol}
Given $p$ let $S$ be the set of primes $\ell>3$ such that
$p\equiv1\pmod\ell$. If $d$ is a product of powers of primes from
$S$, then the rank of $E({\overline{\mathbb{F}}_p}(t^{1/d}))$ is zero.
\end{thm}

The proof of the theorem will will be given in the rest of this section.

\subsection{}
We use the notation of Subsection~\ref{ss:JacobiSums} on Jacobi sums.
Given $p$, $d$ prime to $p$, and ${\mathbf{a}}=(a_0,\dots,a_3)\in A_{d,2}$, we say that ${\mathbf{a}}$ is ``supersingular'' (some authors would say ``pure'')
if for one (and thus every) $q=p^f\equiv1\pmod{d}$ and all
$s\in(\mathbb{Z}/d\mathbb{Z})^\times$ we have
$$\sum_{i=0}^3\sum_{j=0}^{f-1}\left\langle\frac{sp^ja_i}{d}\right\rangle=2f.$$
If ${\mathbf{a}}$ is supersingular, then for every prime $\wp$ of $\mathbb{Q}(\mu_d)$
over $p$, the valuation $\ord_{\wp}J_q({\mathbf{a}})$ is $f$ and this implies that $J_q({\mathbf{a}})$ is a root of unity times $q$; this is the motivation for the terminology ``supersingular''.

\subsection{}
By \cite{UlmerR1}*{6.4 and 7.7}, if $(d,6p)=1$, then the rank of
$E({\overline{\mathbb{F}}_p}(t^{1/d}))$ is equal to the number of elements
$t\in\mathbb{Z}/d\mathbb{Z}\setminus\{0\}$ such that ${\mathbf{a}}=(t,-6t,2t,3t)$ is supersingular. We are going to show that for suitable $d$ there are no supersingular ${\mathbf{a}}$ of this form by using a descending induction based on the following elementary identity. Suppose that
$a\in\mathbb{Z}/d\mathbb{Z}$, $\ell$ is a prime such that $\ell^2|d$ and $\ell\mathrel{\mathchoice{\not|}{\not|}{\kern-.2em\not\kern.2em|}{\kern-.2em\not\kern.2em|}} a$. Let $H$ be the cyclic subgroup of $(\mathbb{Z}/d\mathbb{Z})^\times$ generated by
$1+d/\ell$. Then we have
$$\sum_{s\in H}\left\langle\frac{sa}{d}\right\rangle
=\left\langle\frac{a}{d/\ell}\right\rangle+\frac{\ell-1}{2}.$$

It follows that if ${\mathbf{a}}=(a_0,\dots,a_3)\in A_{d,2}$, $\ell^2|d$,
$\ell\mathrel{\mathchoice{\not|}{\not|}{\kern-.2em\not\kern.2em|}{\kern-.2em\not\kern.2em|}} a_i$ for all $i$, and if ${\mathbf{a}}$ is supersingular, then its image in $A_{d/\ell}$ is also supersingular. Indeed, we have
$$2f\ell=\sum_{s\in H}\sum_{i=0}^3
\sum_{j=0}^{f-1}\left\langle\frac{sp^ja_i}{d}\right\rangle
=\sum_{i=0}^3\sum_{j=0}^{f-1}
\left\langle\frac{p^ja_i}{d/\ell}\right\rangle+2f(\ell-1)$$
and similarly if $a$ is replaced by $ta$ with $t\in(\mathbb{Z}/d\mathbb{Z})^\times$.

\subsection{}
We can now prove the theorem. Suppose given $p$ and $d$ which is a product of primes in $S$. If the rank of $E({\overline{\mathbb{F}}_p}(t^{1/d}))$ were positive, then we would have a $t\in\mathbb{Z}/d\mathbb{Z}$ such that
${\mathbf{a}}=(t,-6t,2t,3t)$ is supersingular. Without loss of generality we may assume that $t\in(\mathbb{Z}/d\mathbb{Z})^\times$ and then that ${\mathbf{a}}=(1,-6,2,3)$.
Applying the observation of the previous subsection repeatedly, we may
``reduce the level'' and find a $d'$ which is a product of distinct primes from $S$ such that $(1,-6,2,3)\in A_{d',2}$ is supersingular.
But for such a $d'$ we have $f=1$, i.e., $p\equiv1\pmod{d'}$ and with this one easily checks that
$$\sum_{i=0}^3\sum_{j=0}^{f-1}\left\langle\frac{p^ja'_i}{d'}\right\rangle=
\sum_{i=0}^3\left\langle\frac{a'_i}{d'}\right\rangle=1\neq 2f$$
and so we arrive at a contradiction to the assumption the
$E({\overline{\mathbb{F}}_p}(t^{1/d}))$ has positive rank. This completes the proof of the Theorem.

\subsection{}
The theorem shows that for any prime $p$ such that $p-1$ is not a power of $2$ times a power of $3$, there is an elliptic curve over
${\mathbb{F}_p}(t)$ with bounded rank in a ${\mathbb{Z}_\ell}$ tower ${\overline{\mathbb{F}}_p}(t^{1/\ell^n})$ for suitable $\ell$. We will prove a stronger result for certain small
$p$ not of this type (namely $p=2, 3, 5, 7$) in the next section and so it seems likely that this kind of statement holds for all $p$.

In the same direction, it seems quite likely that a more refined analysis would show that given $p$, and for $E$ as above, the rank of
$E({\overline{\mathbb{F}}_p}(t^{1/d}))$ is bounded as $d$ runs through all integers which are products of powers of primes $\ell$ such that no power of $p$ is congruent to $-1$ modulo $\ell$.

Generalizing in another direction, a geometric analysis as in
\cite{UlmerR1}*{\S5} applied to the curves in \cite{UlmerR2}*{\S7}
might allow one to prove a version of Theorem~\ref{thm:nonisol} for higher dimensional abelian varieties.

Finally, we note that it is not hard to deduce from Theorem~\ref{thm:nonisol} that the curve defined over $\mathbb{Q}(t)$ by the equation $y^2+xy=x^3-t$ has bounded rank over ${\overline{\mathbb{Q}}}(t^{1/d})$ as $d$
ranges over all positive integers. We omit the details since similar results were shown by Shioda \cite{Shioda}*{Cor.~9} using closely related techniques.

\section{Non-isotrivial elliptic curves with bounded ranks in ${\hat\mathbb{Z}^{(p)}}$-towers}
\subsection{}
We will use completely different techniques, unrelated to Fermat varieties, to give a few examples of non-isotrivial elliptic curves with bounded ranks in towers ${\overline{\mathbb{F}}_p}(t^{1/d})$ as $d$ ranges over all integers prime to $p$.

\begin{thm}\label{thm:nonisod}
If $p\in\{2,3,5,7,11\}$ then there exists an elliptic curve $E$ over
${\mathbb{F}_p}(t)$ with $j(E)\not\in{\mathbb{F}_p}$ such that the rank of
$E({\overline{\mathbb{F}}_p}(t^{1/d}))$ is zero for all positive integers $d$ prime to
$p$.
\end{thm}

The proof of the theorem, which uses ideas from \cite{Ulmerpd}, will be given in the rest of this section.

\subsection{}
Given an elliptic curve $E$ over ${\mathbb{F}_p}(t)$ with $j(E)\not\in{\mathbb{F}_p}$,
choose a non-zero invariant differential $\omega$ on $E$ and let
$\Delta=\Delta(E,\omega)$ and $A=A(E,\omega)$ be the discriminant and Hasse invariant of $E$; the definition of the latter is reviewed in
\cite{Ulmerpd}*{\S2}. Our assumptions imply that $\Delta$ and $A$ are non-zero elements of ${\mathbb{F}_p}(t)$.

Consider the following conditions on $E$:
\begin{itemize}
\item $E$ has good or multiplicative reduction at $t=0$ and $t=\infty$.
\item At every finite non-zero place of ${\mathbb{F}_p}(t)$, $E$ obtains good reduction over a tamely ramified extension.
\item At every finite non-zero place $v$ of ${\mathbb{F}_p}(t)$, we have
$$\frac{\ord_v(A)}{p-1}-\frac{\ord_v(\Delta)}{12} <\frac{1}{p}.$$
\end{itemize}
Note that the third condition is automatic at places where $E$ has good ordinary reduction, in particular at places where $A$ and
$\Delta$ are units. Note also that if $E$ satisfies these conditions then it continues to satisfy them over the extensions ${\mathbb{F}_q}(t^{1/d})$
for any power $q$ of $p$ and any $d$ prime to $p$.

\subsection{}
It follows from \cite{Ulmerpd}*{Section~3 and the first sentence of Section~6} that an elliptic curve over ${\mathbb{F}_p}(t)$ satisfying the conditions of the previous subsection has rank 0 or 1 over any extension $K={\mathbb{F}_q}(t^{1/d})$. To see this, we consider the Frobenius and Verschiebung isogenies
\begin{equation*}
\xymatrix{E\ar[r]^{Fr}&E^{(p)}\ar[r]^{V}&E}
\end{equation*}
whose composition is multiplication by $p$. Section~3 of
\cite{Ulmerpd} computes the Selmer groups for $Fr$ and $V$ in terms of the reduction types of $E$, $A$, and $\Delta$. Under the conditions of the previous subsection, the results are that $\sel(K,V)=0$ and
$\sel(K,Fr)$ is zero if $E$ has good reduction at 0 or $\infty$ and has order $p$ if $E$ has multiplicative reduction at both 0 and
$\infty$.

We have an exact sequence
$$E^{(p)}(K)\to\sel(K,Fr)\to\sel(K,p)\to\sel(K,V)$$
and so the Selmer group for $p$ is either trivial or of order $p$. In the examples we give below, when $\sel(K,Fr)$ is non-trivial, there is a point of order $p$ in $E^{(p)}(K)$ mapping to a generator of
$\sel(K,Fr)$ and so $\sel(K,p)=0$ and $E(K)$ has rank 0.

\subsection{}
We now give explicit examples of elliptic curves satisfying our conditions.

Suppose $p=2$ and let $E$ be defined by
$$y^2+(t-1)xy+(t-1)^2y=x^3.$$
If $\omega=dx/((t-1)x+(t-1)^2)$, then $A=(t-1)$, $\Delta=t(t-1)^8$,
and $j=(t-1)^4/t$. Standard methods show that $E$ has good, ordinary reduction away from 0, 1, and $\infty$; that $E$ has multiplicative reduction at 0 and $\infty$; and that at $t=1$, $E$ obtains good reduction over an extension with ramification index 3 and the inequality involving $A$ and $\Delta$ is satisfied. The point
$(x,y)=((t-1)^2,(t-1)^3)$ on $E^{(2)}$ has order 2 and maps non-trivially to $\sel(K,Fr)$ and so $\sel(K,2)=0$ for all
$K={\mathbb{F}_q}(t^{1/d})$.

If $p=3$, let $E$ be defined by
$$y^2=x^3+(t-1)^2x^2+t(t-1)^3x.$$
If $\omega=dx/2y$, then $A=(t-1)^2$, $\Delta=-t^2(t-1)^9$, and
$j=-(t-1)^3/t^2$. Standard methods show that $E$ has good, ordinary reduction away from 0, 1, and $\infty$; that $E$ has multiplicative reduction at 0 and $\infty$; and that at $t=1$, $E$ obtains good reduction over an extension with ramification index 4 and the inequality involving $A$ and $\Delta$ is satisfied. The points
$(x,y)=(t^2(t-1)^4,\pm t^2(t-1)^6)$ on $E^{(3)}$ have order 3 and map non-trivially to $\sel(K,Fr)$ and so $\sel(K,3)=0$ for all
$K={\mathbb{F}_q}(t^{1/d})$.

If $p=5$, let $E$ be defined by
$$y^2=x^3+3(t-1)^4x+(t+1)(t-1)^5.$$
If $\omega=dx/2y$, then $A=(t-1)^4$, $\Delta=2t(t-1)^{10}$, and
$j=(t-1)^2/2t$. Standard methods show that $E$ has good, ordinary reduction away from 0, 1, and $\infty$; that $E$ has multiplicative reduction at 0 and $\infty$; and that at $t=1$, $E$ obtains good reduction over an extension with ramification index 6 and the inequality involving $A$ and $\Delta$ is satisfied. The points with
$x$ coordinate $2(t-1)^8(t^2\pm2t-1)$ on $E^{(5)}$ have order 5 and map non-trivially to $\sel(K,Fr)$ and so $\sel(K,5)=0$ for all
$K={\mathbb{F}_q}(t^{1/d})$.

If $p=7$, let $E$ be defined by
$$y^2=x^3+(t-1)(t+1)^3x+5(t-1)(t+1)^5.$$
If $\omega=dx/2y$, then $A=(t-1)(t+1)^5$, $\Delta=2(t-1)^2(t+1)^9$,
and $j=4(t-1)$. Standard methods show that $E$ has good, ordinary reduction away from $\pm1$ and $\infty$; that $E$ has multiplicative reduction at $\infty$; that at $t=1$, $E$ obtains good reduction over an extension with ramification index 6 and the inequality involving
$A$ and $\Delta$ is satisfied; and that at $t=-1$, $E$ obtains good reduction over an extension with ramification index 4 and the inequality involving $A$ and $\Delta$ is satisfied. It follows that
$\sel(K,7)=0$ for all $K={\mathbb{F}_q}(t^{1/d})$.

If $p=11$, let $E$ be defined by
$$y^2=x^3+8(t-1)(t+1)^3x+2(t-1)(t+1)^5.$$
If $\omega=dx/2y$, then $A=(t-1)^2(t+1)^8$, $\Delta=9(t-1)^2(t+1)^9$,
and $j=5(t-1)$. Standard methods show that $E$ has good, ordinary reduction away from $\pm1$ and $\infty$; that $E$ has multiplicative reduction at $\infty$; that at $t=1$, $E$ obtains good reduction over an extension with ramification index 6 and the inequality involving
$A$ and $\Delta$ is satisfied; and that at $t=-1$, $E$ obtains good reduction over an extension with ramification index 4 and the inequality involving $A$ and $\Delta$ is satisfied. It follows that
$\sel(K,11)=0$ for all $K={\mathbb{F}_q}(t^{1/d})$.

\subsection{}
The theory of modular forms modulo $p$ suggests that the strategy employed in this section will not work for large $p$. Nevertheless, I conjecture that for all $p$
there are elliptic curves (indeed, absolutely simple abelian varieties of any dimension) over ${\mathbb{F}_p}(t)$ which have bounded Mordell-Weil ranks in the tower ${\mathbb{F}_q}(t^{1/d})$.

\begin{bibdiv}
\begin{biblist}
[\resetbiblist{Ulm05}]

\bib{Aoki}{article}{
author={Aoki, N.},
title={Simple factors of the Jacobian of a Fermat curve and the Picard number of a product of Fermat curves},
journal={Amer. J. Math.},
volume={113},
date={1991},
pages={779\ndash 833},
}

\bib{BayerGonzalez}{article}{
author={Bayer, P.},
author={Gonz{\'a}lez, J.},
title={On the Hasse-Witt invariants of modular curves},
journal={Experiment. Math.},
volume={6},
date={1997},
pages={57--76},
}

\bib{Conrad}{article}{
author={Conrad, B.},
title={Chow's $K/k$-image and $K/k$-trace, and the Lang-N\'eron theorem},
journal={Enseign. Math. (2)},
volume={52},
date={2006},
pages={37\ndash 108},
}

\bib{Davenport}{book}{
author={Davenport, H.},
title={Multiplicative number theory},
series={Graduate Texts in Mathematics},
volume={74},
publisher={Springer-Verlag},
place={New York},
date={2000},
pages={xiv+177},
}

\bib{Ellenberg}{article}{
author={Ellenberg, J. S.},
title={Selmer groups and Mordell-Weil groups of elliptic curves over towers of function fields},
date={2005},
status={Preprint, to appear in {\it Compositio Mathematica\/}},
label={Ellen}
}

\bib{Fastenberg}{article}{
author={Fastenberg, L. A.},
title={Mordell-Weil groups in procyclic extensions of a function field},
journal={Duke Math. J.},
volume={89},
date={1997},
pages={217\ndash 224},
}

\bib{HardyWright}{book}{
author={Hardy, G. H.},
author={Wright, E. M.},
title={An introduction to the theory of numbers},
publisher={The Clarendon Press Oxford University Press},
place={New York},
date={1979},
pages={xvi+426},
}

\bib{Ogus}{article}{
author={Ogus, A.},
title={Hodge cycles and crystalline cohomology},
pages={357--414},
book={
title={Hodge cycles, motives, and Shimura varieties},
author={Deligne, Pierre},
author={Milne, James S.},
author={Ogus, Arthur},
author={Shih, Kuang-yen},
series={Lecture Notes in Mathematics},
volume={900},
publisher={Springer-Verlag},
place={Berlin},
},
date={1982},
}

\bib{Shioda}{article}{
author={Shioda, T.},
title={An explicit algorithm for computing the Picard number of certain algebraic surfaces},
journal={Amer. J. Math.},
volume={108},
date={1986},
pages={415\ndash 432},
}

\bib{Silverman1}{article}{
author={Silverman, J. H.},
title={A bound for the Mordell-Weil rank of an elliptic surface after a cyclic base extension},
journal={J. Algebraic Geom.},
volume={9},
date={2000},
pages={301\ndash 308},
}

\bib{Silverman2}{article}{
author={Silverman, J. H.},
title={The rank of elliptic surfaces in unramified abelian towers},
journal={J. Reine Angew. Math.},
volume={577},
date={2004},
pages={153\ndash 169},
}

\bib{Stiller}{article}{
author={Stiller, P. F.},
title={The Picard numbers of elliptic surfaces with many symmetries},
journal={Pacific J. Math.},
volume={128},
date={1987},
pages={157\ndash 189},
}

\bib{TS}{article}{
author={Tate, J. T.},
author={Shafarevitch, I. R.},
title={The rank of elliptic curves},
language={Russian},
journal={Dokl. Akad. Nauk SSSR},
volume={175},
date={1967},
pages={770\ndash 773},
}

\bib{TateHT}{article}{
author={Tate, J. T.},
title={Classes d'isog\'enie des vari\'et\'es ab\'eliennes sur un corps fini (d'apr\`es T.~Honda)},
book={
title={S\'eminaire Bourbaki. Vol. 1968/69: Expos\'es 347--363},
series={Lecture Notes in Mathematics, Vol. 179},
publisher={Springer-Verlag},
place={Berlin},
date={1971},
},
pages={95--110},
label={Tat68}
}

\bib{Ulmerpd}{article}{
author={Ulmer, D.},
title={$p$-descent in characteristic $p$},
journal={Duke Math. J.},
volume={62},
date={1991},
pages={237\ndash 265},
}

\bib{UlmerR1}{article}{
author={Ulmer, D.},
title={Elliptic curves with large rank over function fields},
journal={Ann. of Math. (2)},
volume={155},
date={2002},
pages={295\ndash 315},
}

\bib{UlmerR2}{article}{
author={Ulmer, D.},
title={$L$-functions with large analytic rank and abelian varieties with large algebraic rank over function fields},
date={2005},
label={Ulmer},
status={Preprint, to appear in {\it Inventiones Mathematicae\/}},
}

\bib{Washington}{book}{
author={Washington, L. C.},
title={Introduction to cyclotomic fields},
series={Graduate Texts in Mathematics},
volume={83},
publisher={Springer-Verlag},
place={New York},
date={1997},
pages={xiv+487},
}

\bib{WeilNS}{article}{
author={Weil, A.},
title={Numbers of solutions of equations in finite fields},
journal={Bull. Amer. Math. Soc.},
volume={55},
date={1949},
pages={497\ndash 508},
}

\end{biblist}
\end{bibdiv}

\end{document}
\section*{\LARGE\bf Non-Standard Big Bang Nucleosynthesis Scenarios}%
\stepcounter{section}%
\addcontentsline{toc}{section}{Non-Standard Big Bang Nucleosynthesis Scenarios}%
\large\itshape%
K.\ Jedamzik\\\vspace{0.1pt}\\%
Max-Planck-Institut f\"ur Astrophysik, Karl-Schwarzschild-Str.1,
D--85748 Garching\\ %
\normalsize\upshape%
\bigskip

\subsection*{Abstract}

A brief overview of non-standard big bang nucleosynthesis (BBN) scenarios is presented.
Trends and results of the light-element nucleosynthesis in BBN scenarios with small-scale or large-scale inhomogeneity, the presence of antimatter domains, stable or unstable massive neutrinos, neutrino oscillations, neutrino degeneracy, or massive decaying particles are summarized.

\subsection*{Introduction}

Light-element nucleosynthesis during the BBN epoch below cosmic temperatures $T\approx 1$MeV contributes significantly to $^4$He, $^3$He and $^7$Li abundances and likely, to all of the $^2$H abundance observed throughout the universe.
BBN is a freeze-out process from nuclear statistical equilibrium such that light-element abundance yields are sensitively dependent on the cosmic conditions during the BBN era as well as the properties of neutrinos governing the freeze-out process from weak equilibrium.
Calculations of abundance yields in a standard BBN scenario are performed under the assumptions of a universe homogeneous in the baryon-to-photon ratio, with massless neutrinos and vanishing neutrino chemical potentials, and in the absence of massive decaying particles or other degrees of freedom.
For reviews on the physics of standard BBN see \cite{kj:skm93}.
The purpose of this short article is to summarize results and trends in theoretical calculations of non-standard BBN scenarios where one of the above assumptions in standard BBN is relaxed.
This summary is in no way intended to be complete neither in discussing all possible modifications to a standard BBN scenario nor in referencing the thousands of articles on non-standard BBN.
I wish to apologize for including only certain key references due to the limited scope of these proceedings and for presenting my personal view of the field of non-standard BBN.
For two excellent reviews on non-standard BBN the reader is referred to \cite{kj:mm93}.
The determination of observationally inferred primordial abundance constraints, which represents possibly the most important branch of the field of big bang nucleosynthesis at present,
will not be discussed here. In what follows, abundance yields in non-standard BBN will either be given in relation to abundance yields in standard BBN or in absolute values and shall be understood as indicative of approximate trends.

\subsection*{Non-Standard BBN}

In the following a list of non-standard BBN scenarios, their respective modifications to standard BBN (hereafter; SBBN),
as well as trends and results in these scenarios are given.

\subsubsection*{Inhomogeneity}

The baryon-to-photon ratio $\eta$ is the one free parameter in SBBN. Any inhomogeneity in this quantity results in modified nucleosynthesis yields which depend on the typical amplitude and spatial seperation scale of inhomogeneities.
Substantial changes in the abundance yields only result when $\delta\eta /\eta \, {}^>_{\sim }\, 1$.

\vskip 0.1in
\noindent
{\bf a) Inhomogeneity in the Baryon-to-Photon Ratio on Small Mass Scales:}
\vskip 0.05in

\noindent Fluctuations in $\eta$ which may arise on sub-horizon scales at earlier cosmic epochs as, for example, possibly during a first-order QCD transition or electroweak transition,
result in a highly nonstandard BBN scenario.
The nucleosynthesis in an environment with $\eta$-fluctuations is characterized by coupled nuclear reactions and hydrodymamic processes, such as baryon diffusion and late-time expansion of high-density regions \cite{kj:ahs87}.
Fluctuations in $\eta$ persist down to the onset of BBN provided the mass of an individual high-density region exceeds $10^{-21}M_{\odot}$.
One of the main features of such scenarios is the differential diffusion of neutrons and protons leading to the existence of neutron-
and proton-rich environments. The trend in inhomogeneous BBN is the overabundant production of $^4$He when compared to SBBN at the same average $\eta$, nevertheless, there exists parameter space where less $^4$He than in SBBN is synthesized.
Scenarios which don't overproduce $^4$He typically have high
($^2$H/H) $\sim 1-2\times 10^{-4}$
and high ($^7$Li/H) $\sim 10^{-9} - 10^{-8}$. Such BBN may agree with observational abundance constraints for fractional contributions of the baryon density to the critical density $\Omega_b$ about 2-3 times larger than in SBBN, but only in the seemingly unlikely advent of efficient $^7$Li depletion in population II stars.

\vskip 0.1in
\noindent
{\bf b) Inhomogeneity in the Baryon-to-Photon Ratio on Large Mass Scales:}
\vskip 0.05in

\noindent When the baryonic mass within a typical fluctuation exceeds ($M \, {}^>_{\sim }\, 10^{-12}M_{\odot}$), baryon diffusion and hydrodymanic processes during the BBN era are of no significance such that BBN abundance yields may be given by an average over the SBBN abundance yields of seperate regions at different $\eta$. For non-linear fluctuations exceeding the post-recombination Jeans mass $M \, {}^>_{\sim }\, 10^5M_{\odot}$, which may exist in primordial isocurvature baryon (PIB) models for structure formation, early collapse of high-density ($\eta$) regions is anticipated \cite{kj:sm86}. The nucleosynthesis yields of collapsing regions may be excluded from the primordial abundance determination if either dark objects form or significant early star formation in such high-density regions occurs.
If only low-density regions contribute to the observable primordial abundances, characteristic average abundance yields for scenarios designed to possibly agree with observationally inferred primordial abundances are:
($^2$H/H) $\sim 1-3\times 10^{-4}$, ($^{7}$Li/H) $\sim 5\times 10^{-10} - 2\times 10^{-9}$,
and $^4$He mass fraction $Y_p\approx 0.22-0.25$ at a total $\Omega_b \, {}^<_{\sim }\, 0.2$ (i.e. including possible dark obje cts),
larger than inferred from a SBBN scenario \cite{kj:jf95,kj:cos95}.
One feature of such models is the prediction of fairly large intrinsic spatial variations in the primordial abundances,
which may be observationally tested by ($^2$H/H) determinations in Lyman-limit systems \cite{kj:jf95}.
These models may only agree with observationally inferred abundance limits when there are no fluctuations below $M \, {}^<_{\sim }\, 10^5M_{\odot}$ and collapse efficiencies of high-density regions are large.

\vskip 0.1in
\noindent
{\bf c) Matter/Antimatter Domains:}
\vskip 0.05in

\noindent A distribution of small-scale matter/antimatter domains in baryon-asymmetric universes (i.e. where net $\eta \neq 0$) may result from electroweak baryogenesis scenarios. If the baryon (antibaryon) mass in individual domains is $\, {}^>_{\sim }\, 10^{-21}M_{\odot}$ the BBN process in such scenarios is characterized by differential diffusion of neutrons (antineutrons) and protons (antiprotons)
which causes a preferential annihilation of antimatter on neutrons
\cite{kj:St76}. When annihilation of antimatter occurs before significant $^4$He synthesis ($T\, {}^>_{\sim }\, 80$keV)
but after weak freeze-out ($T\, {}^<_{\sim }\, 1$MeV)
a modest to substantial reduction of $Y_p$ results.
When annihilations occur mainly after $^4$He synthesis the dominant effect is significant production of $^3$He and $^2$H
\cite{kj:betal88}, with $^3$He/$^2$H ratios likely to be in conflict with observational constraints.

\subsubsection*{Non-standard Neutrino Properties}

The BBN process may be approximated as the incorporation of all available neutrons into $^4$He nuclei at a temperature $T\approx 80$keV.
The neutron abundance at this temperature, and hence the final
$^4$He mass fraction $Y_p$, may be increased with respect to a SBBN scenario due to an increased expansion rate of the universe during the BBN era. An increased expansion rate raises the neutron-to-proton ratio (hereafter; n/p) at weak freeze-out and reduces the time for neutron decay to decrase the n/p ratio between weak freeze-out and significant $^4$He synthesis.
We will refer to this effect as the \lq\lq expansion rate effect\rq\rq .
In addition, the n/p ratio at $T\approx 80$keV may be either decreased or increased by introducing additional electron- and/or anti-electron neutrinos into the plasma.
In SBBN it is assumed that the left-handed neutrino and right-handed antineutrino seas of three massless, stable neutrino flavors $\nu_e$, $\nu_{\mu}$,
and $\nu_{\tau}$
are populated and that neutrino chemical potentials do vanish.
Modifications of these assumptions usually result in either the expansion rate effect or additional (anti) electron neutrinos, or both. The principal effect of such modifications is to change the $^4$He abundance, and to a less observationally significant degree the abundances of other light-element isotopes.

\vskip 0.1in
\noindent
{\bf a) Massive, long-lived $\tau$-neutrinos:}
\vskip 0.05in

\noindent Neutrinos are considered massless and long-lived in the context of BBN for neutrino masses $m_{\nu}\, {}^<_{\sim }\, 100$keV and lifetimes $\tau_{\nu}\, {}^>_{\sim }\, 10^3$s
(see b), however, possible photodisintegration). A massive, long-lived
$\tau$-neutrino leads to the expansion rate effect since the contribution to the total energy density from the rest mass of $\tau$-neutrinos continually increases as the universe expands between weak freeze-out and $^4$He synthesis, possibly even resulting in matter domination during the BBN era. BBN with massive, long-lived $\tau$ neutrinos and for experimentally allowed $\nu_{\tau}$-masses therefore results in increased $^4$He and useful limits on the allowed mass of a long-lived $\tau$-neutrino have been derived
\cite{kj:ketal91}.

\vskip 0.1in
\noindent
{\bf b) Massive, unstable $\tau$-neutrinos:}
\vskip 0.05in

\noindent The effects of decaying $\tau$ neutrinos on the light-element nucleosythesis
\cite{kj:ks82}
sensitively depend on the decay products. One distinguishes between
(i) decay into sterile particles, in particular, particles which interact neither weakly with nucleons interchanging neutrons and protons nor electromagnetically with the ambient plasma
(e.g. $\nu_{\tau}\mapsto \nu_{\mu} + \phi$, where $\phi$ is a weakly interacting scalar),
(ii) decay into sterile and electromagnetically interacting particles
(e.g. $\nu_{\tau}\mapsto \nu_{\mu} + \gamma$),
and (iii) decay into (anti) electron neutrinos and sterile particles (e.g. $\nu_{\tau}\mapsto \nu_{e} + \phi$ )
\cite{kj:dgt94,kj:ketal94}.
For decay channel (i) and $\tau_{\nu} \, {}^>_{\sim }\, 1$s increased $^4$He mass fraction results due to the expansion rate effect which, nevertheless, is weaker than for long-lived $\tau$-neutrinos since the energy of the (massless) decay products redshifts with the expansion of the universe.
In contrast, for life times
$\tau_{\nu_{\tau}} \, {}^<_{\sim }\, 1$s and $m_{\nu_{\tau}}\, {}^>_{\sim }\, 10$MeV it is possible to reduce the $Y_p$
since effectively the distributions of only two neutrino flavors are populated
\cite{kj:kks97}.
Decay channel (ii) would have interesting effects on BBN but is excluded by observations of supernova 1987A for $\tau_{\nu_{\tau}}
\, {}^<_{\sim }\, 10^4$s.
For decay via channel (iii) additional (anti) electron neutrinos are injected into the plasma and their effect depends strongly on the time of injection \cite{kj:ts88}.
When injected early ($\tau_{\nu_{\tau}}\sim 1$s), the net result is a reduction of $Y_p$ \cite{kj:h97}
since weak freeze-out occurs at lower temperatures.
In contrast, when injected late ($\tau_{\nu} \sim 10^2 - 10^3$s) the resulting non-thermal electron neutrinos affect a conversion of protons into neutrons,
yielding higher $Y_p$ and/or higher $^2$H depending on injection time.
It had been suggested that a scenario with late-decaying $\nu_{\tau}$ via channel
(iii) may result in the relaxation of BBN bounds on $\Omega_b$ by a factor up to ten \cite{kj:dgt94}, nevertheless,
this possibility seems to be ruled out now by the current upper laboratory limit on the $\nu_{\tau}$-mass.
For life times $\tau_{\nu_{\tau}}\, {}^>_{\sim }\, 10^3$s, modifications of the light-element abundances after the BBN era by photodisintegration of nuclei may result (cf. radiative decays).

\vskip 0.1in
\noindent
{\bf c) Neutrino oscillations:}
\vskip 0.05in

\noindent Neutrino oscillations may occur when at least one neutrino species has non-vanishing mass and the weak neutrino interaction eigenstates are not mass eigenstates of the Hamiltonian. One distinguishes between
(i) flavor-changing neutrino oscillations
(e.g. $\nu_{e}\longleftrightarrow \nu_{\mu}$) and (ii) active-sterile neutrino oscillations (e.g. $\nu_{e}\longleftrightarrow \nu_{s}$).
Here $\nu_s$ may be either the right-handed component of a $\nu_e$ ($\nu_{\mu}$, $\nu_{\tau}$) Dirac neutrino,
or a fourth family of neutrinos beyond the standard model of electroweak interactions. In the absence of sterile neutrinos and neutrino degeneracy, neutrino oscillations have negligible effect on BBN due to the almost equal number densities of neutrino flavors. When sterile neutrinos exist, neutrino oscillations may result into the population of the sterile neutrino distribution, increasing the energy density, and leading to the expansion rate effect. The increased $Y_p$
has been used to infer limits on the neutrino squared-mass difference --
mixing angle plane \cite{kj:bd91}.
In the presence of large initial lepton number asymmetries
(see neutrino degeneracy) and with sterile neutrinos, it may be possible to reduce $Y_p$
somewhat independently from the detailled initial conditions,
through the dynamic generation of electron (as well as, $\mu$ and $\tau$) neutrino chemical potentials \cite{kj:fv}.

\subsubsection*{Neutrino Degeneracy}

It is possible that the universe has net lepton number.
Positive net cosmic lepton number manifests itself at low temperatures through an excess of neutrinos over antineutrinos. If net lepton number in either of the three families in the standard model is about ten orders of magnitude larger than the net cosmic baryon number BBN abundance yields are notabely affected. Asymmetries between the $\nu_{\mu}$ ($\nu_{\tau}$) and
$\bar{\nu}_{\mu}$ ($\bar{\nu}_{\tau}$)
number densities result in the expansion rate effect only, whereas asymmetries between the $\nu_e$ and $\bar{\nu}_e$ number densities induce a change in the weak freeze-out (n/p) ratio as well.
Since the expansion rate effect leads to increased $^4$He production
$\nu_{\mu}$ ($\nu_{\tau}$) degeneracy may be constrained.
However, one may find combinations of $\nu_{\mu}$, $\nu_{\tau}$, {\it and} $\nu_e$
chemical potentials which are consistent with observational abundance constraints for $\Omega_b$ much larger than that inferred from SBBN \cite{kj:by77}.
Nevertheless, such solutions not only require large chemical potentials but also an asymmetry between the individual chemical potentials of $\nu_e$ and $\nu_{\mu}$ ($\nu_{\tau}$).
Asymmetries between the different flavor degeneracies may be erased in the presence of neutrino oscillations.

\subsubsection*{Massive Decaying Particles}

The out-of-equilibrium decay or annihilation of long-lived particles
($\tau \, {}^>_{\sim }\, 0.1$s), such as light supersymmetric particles, as well as non-thermal particle production by, for example, evaporating primordial black holes or collapsing cosmic string loops, during, or after, the BBN era may significantly alter the BBN nucleosynthesis yields \cite{kj:detal78}.
The decays may be classified according to if they are (i) radiative or
(ii) hadronic, in particular, whether the electromagnetic or strong interactions of the injected non-thermal particles are most relevant.

\vskip 0.1in
\noindent
{\bf a) Radiative decays:}
\vskip 0.05in

\noindent Electromagnetically interacting particles (i.e.$\gamma$, $e^{\pm}$) thermalize quickly in the ambient photon-pair plasma at high temperatures, such that they usually have little effect on BBN other than some heating of the plasma. In contrast, if radiative decays occur at lower temperatures after a conventional BBN epoch, they result in a rapidly developing $\gamma$-$e^{\pm}$ cascade which only subsides when individual $\gamma$-rays of the cascade do not have enough energy to further pair-produce $e^{\pm}$ on the ambient cosmic background radiation. The net result of this cascade is a less rapidly developing $\gamma$-ray background whose properties only depend on the total amount of energy released in electromagnetically interacting particles and the epoch of decay.
At temperatures $T\, {}^<_{\sim }\, 5$keV the most energetic $\gamma$-rays in this background may photodissociate deuterium, at temperatures $T\, {}^<_{\sim }\, 0.5$keV the photodisintegration of
$^4$He becomes possible.
Radiative decays at $T\approx 5$keV may result in a BBN scenario with low $^2$H
{\it and} low $^4$He if an ordinary SBBN scenario at low $\eta$
is followed by an epoch of deuterium photodisintegration \cite{kj:hkm96}.
Radiative decays at lower temperatures may produce signficant amounts of $^2$H and $^3$He. Nevertheless, this process is unlikely to be the sole producer of $^2$H due to resulting observationally disfavored
$^3$He/$^2$H ratio. The possible overproduction of $^3$He and $^2$H by the photodisintegration of $^4$He has been used to place meaningful limits on the amount of non-thermal,
electromagnetically interacting energy released into the cosmic background. These limits may actually be more stringent than comparable limits from the distortion of the cosmic microwave background radiation \cite{kj:setal96}.

\vskip 0.1in
\noindent
{\bf b) Hadronic decays:}
\vskip 0.05in

\noindent The injection of hadrons into the plasma, such as $\pi^{\pm}$'s, $\pi^0$'s and nucleons, may affect light-element nucleosythesis during, or after, the BBN era and by the destruction {\it and} production of nuclei. In general, possible scenarios and reactions are numerous. If charged hadrons are produced with high energies below cosmic temperature $T\, {}^<_{\sim }\, $keV they may cause a cascade leading to the possibility of photodisintegration of nuclei (see radiative decays).
Through charge exchange reactions the release of about ten pions per nucleon at $T\approx 1$MeV results in a significant perturbation of weak freeze-out and increased $Y_p$ \cite{kj:rs88}.
A fraction of only $\sim 10^{-3}$ antinucleons per nucleon may cause overproduction of $^3$He and $^2$H through antinucleon -- $^4$He annihilations.
It is also conceivable that the particle decaying carries baryon number such that the cosmic baryon number is created {\it during} the BBN era.
A well-studied BBN scenario is the injection of high-energy ($\sim 1$GeV) nucleons created in hadronic jets which are produced by the decay of the parent particle
\cite{kj:detal88}.
If released after a conventional BBN era these high-energy nucleons may spall pre-existing $^4$He, thereby producing high-energy $^2$H, $^3$H, and $^3$He as well as neutrons. Such energetic light nuclei may initiate an epoch of non-thermal nucleosythesis.
It was found that the nucleosynthesis yields from a BBN scenario with hadro-destruction/production and photodisintegration may result in abundance yields independent of $\eta$ for a range in total energy injection, and half-life of the decaying particles. Nevertheless, such scenarios seem to produce $^6$Li in conflict with observational constraints.

\subsubsection*{Other Modifications to BBN}

There are many other variants to a standard big bang nucleosythesis scenario which have not been mentioned here.
These include anisotropic expansion, variations of fundamental constants,
theories other than general relativity, magnetic fields during BBN,
superconducting cosmic strings during BBN, among others.
The influence of many, but not all, of those scenarions on BBN is due to the expansion rate effect. Studies of such variants is of importance mainly due to the constraints they allow one to derive on the evolution of the early universe.

%
%

%
\setcounter{nref}{\arabic{enumiv}}

\end{document}
\title[Plateau flow]
{Plateau flow\\ or\\ the heat flow for half-harmonic maps}

\begin{abstract}
Using the Millot-Sire interpretation of the half-Laplacian on $S^1$ as the Dirichlet-to-Neumann operator for the Laplace equation on the ball $B$, we devise a classical approach to the heat flow for half-harmonic maps from $S^1$ to a closed target manifold $N\subset\mathbbm R^n$, recently studied by Wettstein, and for arbitrary finite-energy data we obtain a result fully analogous to the classical results for the harmonic map heat flow of surfaces and in similar generality. When $N$ is a smoothly embedded, oriented closed curve $\Gamma\subset\mathbbm R^n$
the half-harmonic map heat flow may be viewed as an alternative gradient flow for the Plateau problem of disc-type minimal surfaces.
\end{abstract}

\section{Background and results}
\subsection{Half-harmonic maps and their heat flow}
Let $N\subset\mathbbm R^n$ be a closed sub-manifold, that is, compact and without boundary.
The concept of a half-harmonic map $u\colon S^1\to N\subset\mathbbm R^n$ was introduced by Da Lio-Rivi\`ere \cite{Da Lio-Riviere-2011}, who together with Martinazzi in
\cite{Da Lio-Martinazzi-Riviere-2015}, Theorem 2.9, also made the interesting observation that the harmonic extension of a half-harmonic map yields a free boundary minimal surface supported by $N$, a fact which also was noticed by Millot-Sire \cite{Millot-Sire-2015},
Remark 4.28.

In his PhD-thesis, Wettstein \cite{Wettstein-2022},\cite{Wettstein-2021a}, \cite{Wettstein-2021b},
recently studied the corresponding heat flow given by the equation
\begin{equation}\label{1.1}
d\pi_N(u)\big(u_t+(-\Delta)^{1/2}u\big)=0\ \hbox{ on } S^1\times[0,\infty[,
\end{equation}
where $u_t=\partial_tu$,
and where $\pi_N\colon N_{\rho}\to N$ is the smooth nearest neighbor projection on a
$\rho$-neighborhood $N_{\rho}$ of the given target manifold to $N$,
and, with the help of a fine analysis of the fractional differential operators involved,
he showed global existence for initial data of small energy.

In their work on fractional Ginzburg-Landau equations and half-harmonic maps Millot-Sire \cite{Millot-Sire-2015} exploit the fact that for any smooth $u\colon S^1\to\mathbbm R^n$
we can represent the half-Laplacian classically in the form
\begin{equation}\label{1.2}
(-\Delta)^{1/2}u=\partial_rU
\end{equation}
where $U\colon B\to\mathbbm R^n$ is the harmonic extension of $u$ to the unit disc $B$
\footnote{The classical formula \eqref{1.2} is a special case of a much more general result, due to Caffarelli-Silvestre \cite{Caffarelli-Silvestre-2007}, who pointed out that many nonlocal problems involving fractional powers of the Laplacian can be related to a local,
possibly degenerate, elliptic equation via a suitable extension of the solution to a half-space.}.
Here, using the identity \eqref{1.2} we are able to remove the smallness assumption in Wettstein's work and show the existence of a ``global'' weak solution to the heat flow \eqref{1.1} for data of arbitrarily large (but finite) energy,
which is defined for all times and smooth away from finitely many ``blow-up points''
where energy concentrates, and whose energy is non-increasing. The solution is unique in this class in exact analogy with the classical result \cite{Struwe-1985} by the author on the harmonic map heat flow for maps from a closed surface to a closed target manifold
$N\subset\mathbbm R^n$; see Theorem \ref{thm1.2} below.

In order to describe our work in more detail, let
\begin{equation*}
H^{1/2}(S^1;N)=\{u\in H^{1/2}(S^1;\mathbbm R^n);\;u(z)\in N\hbox{ for almost every }z\in S^1\}.
\end{equation*}
Interpreting $S^1=\partial B$, where $B=B_1(0;\mathbbm R^2)$ and tacitly identifying a map
$u\in H^{1/2}(S^1;N)$ with its harmonic extension
$U\in H^1(B;\mathbbm R^n)$, for a given function $u_0\in H^{1/2}(S^1;N)$ we then seek to find a family of harmonic functions $u(t)\in H^1(B;\mathbbm R^n)$ with traces $u(t)\in H^{1/2}(S^1;N)$
for $t>0$, solving the equation
\begin{equation}\label{1.3}
d\pi_N(u)\big(u_t+\partial_ru\big)=u_t+d\pi_N(u)\partial_ru=0\ \hbox{ on } S^1\times[0,\infty[,
\end{equation}
with initial data
\begin{equation}\label{1.4}
u|_{t=0}=u_0\in H^{1/2}(S^1;N).
\end{equation}

\subsection{Energy}
The half-harmonic heat flow may be regarded as the heat flow for the half-energy
\begin{equation*}
E_{1/2}(u)=\frac12\int_{S^1}|(-\Delta)^{1/4}|^2d\phi
\end{equation*}
of a map $u\in H^{1/2}(S^1;N)$. Note that the half-energy of $u$ equals the standard Dirichlet energy
\begin{equation*}
E(u)=\frac12\int_B|\nabla u|^2dz
\end{equation*}
of its harmonic extension $u\in H^1(B;\mathbbm R^n)$. Indeed, integrating by parts we have
\begin{equation}\label{1.5}
\int_B|\nabla u|^2dz=\int_{S^1}u\partial_ru\,d\phi=\int_{S^1}u(-\Delta)^{1/2}u\,d\phi
=\int_{S^1}|(-\Delta)^{1/4}u|^2d\phi,
\end{equation}
where we use the Millot-Sire identity \eqref{1.2} and where the last identity easily follows from the representation of the operators
$(-\Delta)^{1/2}$ and $(-\Delta)^{1/4}$ in Fourier space with symbols $|\xi|$, $\sqrt{|\xi|}$,
respectively, and Parceval's identity.
\footnote{Conversely, via Fourier expansion we also can prove \eqref{1.5} directly.
Computing the first variations of $E$ and $E_{1/2}$, respectively, we then obtain \eqref{1.2}.}
Therefore, in the following for convenience we may always work with the classically defined Dirichlet energy. Moreover, we may interpret the half-harmonic heat flow as the heat flow for the Dirichlet energy in the class of harmonic functions with trace in $H^{1/2}(S^1;N)$;
see Section \ref{Energy} below for details.

\subsection{Results}
Identifying $\mathbbm R^2\cong\mathbbm C$, we denote as $M$ the $3$-dimensional M\"obius group of conformal transformations of the unit disc, given by
\begin{equation*}
M=\{\Phi(z)=e^{i\theta}\frac{z-a}{\bar{a}+z}\in C^{\infty}(\bar{B};\bar{B}):\
|a|<1,\ \theta\in\mathbbm R\}.
\end{equation*}
Observe that the Dirichlet energy is invariant under conformal transformations, and we have $E(u)=E(u\circ\Phi)$ for any $u\in H^1(B;\mathbbm R^n)$ and any $\Phi\in M$.

For smooth data we then have the following result.

\begin{theorem}\label{thm1.1}
Let $N\subset\mathbbm R^n$ be a closed, smooth sub-manifold of $\mathbbm R^n$, and suppose that the normal bundle $T^{\perp}N$ is parallelizable. Then the following holds:

i) For any smooth $u_0\in H^{1/2}(S^1;N)$ there exists a time $T_0\le\infty$ and a unique smooth solution $u=u(t)$ of \eqref{1.3}, hence of \eqref{1.1}, with data \eqref{1.4}
for $0<t<T_0$.

ii) If $T_0<\infty$, we have concentration in the sense that for some $\delta>0$ and any
$R>0$ there holds
\begin{equation*}
\sup_{z_0\in B,\,0<t<T_0}\int_{B_R(z_0)\cap B}|\nabla u(t)|^2dz\ge\delta,
\end{equation*}
and for suitable $t_k\uparrow T_0$ there exist finitely many points $z_k^{(1)},\dots,z_k^{(i_0)}$
and conformal maps $\Phi_k^{(i)}\in M$ with $z_k^{(i)}\to z^{(i)}\in\bar{B}$ and
$\Phi_k^{(i)}\to\Phi_{\infty}^{(i)}\equiv z^{(i)}$ weakly in $H^1(B)$
such that $u(t_k)\circ\Phi_k^{(i)}\to\bar{u}^{(i)}$ weakly in $H^1(B)$ as $k\to\infty$,
where $\bar{u}^{(i)}$ is non-constant and conformal and satisfies
\begin{equation}\label{1.6}
d\pi_N(\bar{u}^{(i)})\partial_r\bar{u}^{(i)}=0,\, 1\le i\le i_0.
\end{equation}
Moreover, there exists $\delta=\delta(N)>0$ such that $E(\bar{u}^{(i)})\ge\delta$,
and $i_0\le E(u_0)/\delta$.
Finally, $u(t_k)$ smoothly converges to a limit $u_1\in H^{1/2}(S^1;N)$
on $\bar{B}\setminus\{z^{(1)},\dots,z^{(i_0)}\}$.

iii) If $T_0=\infty$, then,
as $t\to\infty$ suitably, $u(t)$ smoothly converges to a half-harmonic limit map away from at most finitely many concentration points where non-constant half-harmonic maps ``bubble off'' as in ii).
\end{theorem}

By the Da Lio-Rivi\`ere interpretation of \eqref{1.6},
the ``bubbles'' $\bar{u}^{(i)}$ as well as the limit $u_{\infty}$ of the flow conformally parametrize minimal surfaces with free boundary on $N$,
meeting $N$ orthogonally along their free boundaries.

The hypothesis regarding the target manifold $N$ in particular is fullfilled if $N$ is a closed, orientable hypersurface of co-dimension $1$ in $\mathbbm R^n$, or if $N$ is a smoothly embedded, closed curve $\Gamma\subset\mathbbm R^n$.

For data in $H^{1/2}(S^1;N)$ the following global existence result holds, which is our main result.

\begin{theorem}\label{thm1.2}
For $N\subset\mathbbm R^n$ as in Theorem \ref{thm1.1} the following holds:
i) For any $u_0\in H^{1/2}(S^1;N)$ there exists a unique global weak solution of
\eqref{1.3} with data \eqref{1.4} as in Definition \ref{def6.3},
whose energy is non-increasing and which is smooth for positive time away from finitely many points in space-time where non-trivial half-harmonic maps ``bubble off'' in the sense of Theorem \ref{thm1.1}.ii).

ii) As $t\to\infty$ suitably, $u(t)$ smoothly converges to a half-harmonic limit map away from at most finitely many concentration points where non-constant half-harmonic maps ``bubble off'' as in Theorem \ref{thm1.1}.iii).
\end{theorem}

Note that uniqueness is only asserted within the class of partially regular weak solutions with non-increasing energy, as in the case of harmonic map heat flow. It would be interesting to find out if the latter condition suffices, as in the work of Freire \cite{Freire-1995},
and, conversely, to explore the possibility of ``backward bubbling'' in \eqref{1.3}, as in the examples of Topping \cite{Topping-2002} for the latter flow.

\subsection{Key features of the proof and related flow equations}
In our approach we uncover and exploit surprising regularity properties of the normal component $d\pi_N^{\perp}(u)\partial_ru$ for the harmonic extension of $u$, likely related to the fractional commutator estimates for the normal projection in the work of Da Lio-Rivi\`ere \cite{Da Lio-Riviere-2011} or the regularity estimates of Da Lio-Pigati \cite{Da Lio-Pigati-2020}, Mazowiecka-Schikorra \cite{Mazowiecka-Schikorra-2018},
and others.

The use of the Dirichlet-to-Neumann map for the harmonic extension $u\colon B\to\mathbbm R^n$ of $u$ instead of the half-Laplacian,
and the simple identity \eqref{3.2} as well as equation \eqref{3.5} allow to perform the analysis using only local, classically defined operators, avoiding fractional calculus almost entirely.

Note that equation \eqref{1.3} is similar to the equation governing the (scalar) evolution problem for conformal metrics $e^{2u}g_{\mathbbm R^2}$ of prescribed geodesic boundary curvature and vanishing Gauss curvature on the unit disc $B$, studied for instance by Brendle \cite{Brendle-2002} or Gehrig \cite{Gehrig-2020}.
In contrast to the latter flows, due to the presence of the projection operator mapping $u_r$
to its tangent component, the flow \eqref{1.3} at first sight appears to be degenerate.
However, surprisingly, within our framework we are able to obtain similar smoothing properties as in the case of the harmonic map heat flow of surfaces.

A different heat flow associated with half-harmonic maps,
using the half-heat operator $(\partial_t-\Delta)^{1/2}$ instead of
\eqref{1.1}, was suggested by Hyder et al. \cite{Hyder-et-al-2021}, and they obtained global existence of partially regular, but possibly non-unique weak solutions for their flow, with a possibly large singular set of measure zero.

\subsection{Applications to Plateau problem}
In the case when $N$ is a smoothly embedded, oriented closed curve $\Gamma\subset\mathbbm R^n$
the half-harmonic heat flow \eqref{1.3} may furnish an alternative gradient flow for the Plateau problem of minimal surfaces of the type of the disc, which has a long and famous tradition in geometric analysis.

Having been posed by Plateau in the 1890's, Plateau's problem was finally solved independently by Douglas \cite{Douglas-1931} and Rad\'o \cite{Rado-1930}
in 1930/31.
In order to analyse the set of {\it all} minimal surfaces solving the Plateau problem,
including saddle points of the Dirichlet integral, thereby building on Douglas' ideas,
in 1939 Morse-Tompkins \cite{Morse-Tompkins-1939} proposed a critical point theory for Plateau's problem in the sense of Morse \cite{Morse-1937},
attempting to characterize non-minimizing solutions as ``homotopy-critical'' points of Dirichlet's integral.
However, in the 1980's Tromba \cite{Tromba-1984},
\cite{Tromba-1985} pointed out that it was not even clear that all smooth, non-degenerate minimal surfaces would be ``homotopy-critical''
in the sense of Morse-Tompkins \cite{Morse-Tompkins-1939}.
To overcome this problem, Tromba developed a version of degree theory that could be applied in this case and which yielded at least a proof of the ``last'' Morse inequality,
which is an identity for the total degree.

In 1982, finally, this author \cite{Struwe-1984}
recast the Plateau problem as a variational problem on a closed convex set and he was able to develop a version of the Palais-Smale type critical point theory for the problem within this frame-work, which allowed him to obtain all Morse inequalities in a rigorous fashion; see the monograph \cite{Struwe-1988} and the paper by Imbusch-Struwe
\cite{Imbusch-Struwe-1999} for further details. In the papers \cite{Struwe-1986} by this author and \cite{Jost-Struwe-1990} by Jost-Struwe the appraoch was extended to the case of multiple boundaries and/or higher genus.

A key element of critical point theory for a variational problem is the construction of a pseudo-gradient flow for the problem at hand.
In \cite{Struwe-1984} this was achieved in an ad-hoc way. However, starting with the work of Eells-Sampson \cite{Eells-Sampson-1964} on the harmonic map heat flow, it is now an established approach in geometric analysis to study the
(negative) ($L^2$-)gradient flow related to a variational problem,
similar to the standard heat equation.
For Plateau's problem, such a flow was obtained by Chang-Liu \cite{Chang-Liu-2005}
within the frame-work laid out by Struwe \cite{Struwe-1984}
in the form of a parabolic variational inequality, for which Chang-Liu obtained a solution of class $H^2$ by means of a time-discrete minimization scheme. Rupflin \cite{Rupflin-2017},
Rupflin-Schrecker \cite{Rupflin-Schrecker-2018}
studied the analogous parabolic variational inequality in the case of an annulus,
which again had previously been studied by this author \cite{Struwe-1986} by means of an ad-hoc pseudo-gradient flow.

In view of the much better regularity properties of the flow equation \eqref{1.3} it would be tempting to regard this as the correct definition of the canonical gradient flow for the Plateau problem, but an important issue still needs to be addressed.

\subsection{Monotonicity}
Recall that in the classical Plateau problem $u(t)$ is required to induce a (weakly) monotone parametrization of $\Gamma$ for each $t>0$. Even though it may seem likely that -- at least for curves $\Gamma$ on the boundary of a convex body in $\mathbbm R^3$ -- this Plateau boundary condition will be preserved along the flow \eqref{1.3} whenever it is satisfied initially,
at this moment even for a strictly convex planar curve $\Gamma\subset\mathbbm R^2$ it is not clear whether this actually happens.
However, the results that we obtain also seem to be of interest if we drop the Plateau condition.
In particular, our results motivate the study of smooth minimal surfaces with continous trace covering only a part of the given boundary curve $\Gamma$; dropping the monotonicity condition also brings the parametric approach to the Plateau problem closer to the approach via geometric measure theory.

\subsection{Plateau flow}
It should be straightforward to extend our results to the case when the disc $B$ is replaced by a surface $\Sigma$ of higher genus with boundary $\partial\Sigma\cong S^1$,
if for given initial data $u_0\in H^{1/2}(S^1;N)$ we consider a family $u=u(t)$
in $H^{1/2}(S^1;N)$ solving the equation \eqref{1.3}, that is,
\begin{equation*}
u_t+d\pi_N(u)\partial_ru=0
\end{equation*}
instead of \eqref{1.1}, where for each time we harmonically extend $u(t)$ to $\Sigma$ and denote as $\partial_ru$ the outward normal derivative of $u$ along $\partial\Sigma$,
as was proposed and analysed by Da Lio-Pigati \cite{Da Lio-Pigati-2020} in the time-independent case.
Similarly, one might study the flow \eqref{1.3} on a domain $\Sigma$ with multiple boundaries;
or one might couple the flow \eqref{1.3} with a corresponding evolution equation for the conformal structure on $\Sigma$, as in the work of Rupflin-Topping \cite{Rupflin-Topping-2019}.
Note that on a general doamin $\Sigma$ the flow equations \eqref{1.1} and \eqref{1.3} no longer agree. In order to clearly distinguish the flow equation \eqref{1.3} from the equation \eqref{1.1} defining the half-harmonic map heat flow, we therefore propose to say that \eqref{1.3} defines the ``Plateau flow''.

\subsection{Outline}
After a brief discussion of energy estimates in Section \ref{Energy}, in Section \ref{A regularity estimate} we present the analytic core of the argument for higher regularity in Section \ref{Higher regularity} and for the blow-up analysis,
later presented in Section \ref{Blow-up}.
These tools are also instrumental in proving uniqueness of partially regular weak solutions in Section \ref{Uniqueness}.
The $L^2$-bounds for higher and higher derivatives which we establish in Section \ref{Higher regularity}, assuming that energy does not concentrate,
may be of particular interest. These bounds either concern estimates for $\nabla\partial^k_{\phi}u$ on $B$ or on $\partial B$, and we view the latter bounds as stronger by an order of $1/2$. These bounds may be used interlaced, as we later do in Section \ref{Weak solutions}, to prove uniform smooth estimates, locally in time, for smooth flows with smooth initial data converging in $H^{1/2}(N;S^1)$. Since the latter data are dense in $H^{1/2}(N;S^1)$ we thus not only obtain existence of weak solutions for arbitrary data $u_0\in H^{1/2}(N;S^1)$ but also can show their smoothness for positive time and hence are able to derive Theorem \ref{thm1.2} from Theorem \ref{thm1.1}.
A peculiar feature is that one set of regularity estimates can only be obtained globally, that is on all of $B$, whereas the other set of estimates may be localized using cut-off functions. Similar estimates for a regularized version of
\eqref{1.3} are employed in Section \ref{Local existence} to prove local existence of smooth solutions of \eqref{1.3} for smooth data \eqref{1.4}.
Finally, in Section \ref{Asymptotics} the large-time behavior of smooth solutions to
\eqref{1.3} is discussed, finishing the proof of Theorem \ref{thm1.1}.

\subsection{Notation}
The letter $C$ is used throughout to denote a generic constant, possibly depending on the
``target'' $N$ and the initial energy $E(u_0)$.

Moreover, since $T^{\perp}N$ by assumption is parallelizable and compact, there exists $\rho>0$
such that the representation
\begin{equation*}
T\colon N\times B_{\rho}(0;\mathbbm R^m)\ni(p,y)\to p+\sum_{i=1}^my^i\nu_i(p)\in N_{\rho}
\end{equation*}
of the tubular neighborhood $N_{\rho}=\cup_{p\in N}B_{\rho}(p)$ of $N$ is a diffeomorphism,
where $\nu_1,\dots,\nu_m$ is a suitable smooth orthonormal frame along $N$ and where we let $y=(y^1,\dots,y^m)\in\mathbbm R^m$.
For $q\in N_{\rho}$ then $T^{-1}(q)=(p,h)$ with $p=\pi_N(q)$ defines a (vector-valued)
signed distance function $h=h(q)=(h^1(q),\dots,h^m(q))$ with $h^i(q)=\nu_i(p)\cdot(q-\pi_N(q))$
for each $1\le i\le i_0$.
Fixing a smooth function $\eta\colon\mathbbm R\to\mathbbm R$ such that $\eta(s)=s$ for $|s|<\rho/2$,
and with $\eta(s)=0$ for $|s|\ge 3\rho/4$, we then let
\begin{equation*}
dist_N(q)=(dist^1_N(q),\dots,dist^m_N(q)),
\end{equation*}
with
\begin{equation*}
dist^i_N(q)=\eta(h^i(q))\hbox{ for }q\in N_{\rho},\ dist^i_N(q)=0\hbox{ else, }1\le i\le m.
\end{equation*}
Then for any smooth $u\in H^{1/2}(S^1;N)$ with harmonic extension $u\in H^1(B;\mathbbm R^n)$ we have
\begin{equation}\label{1.7}
\sum_{i=1}^m\nu_i(u)\partial_rdist^i_N(u)
=\sum_{i=1}^m\nu_i(u)\nu_i(u)\cdot u_r=d\pi_N^{\perp}(u)u_r\ \hbox{ on } \partial B=S^1,
\end{equation}
where for each $p\in N$ we denote as
$d\pi_N^{\perp}(p)=1-d\pi_N(p)\colon\mathbbm R^n\to T_p^{\perp}N$ the orthogonal projection.
In the sequel, we abbreviate
\begin{equation*}
\sum_{i=1}^m\nu_i(u)\nu_i(u)\cdot u_r=:\nu(u)\nu(u)\cdot u_r=\nu(u)\partial_rdist_N(u);
\end{equation*}
moreover, we extend the vector fields $\nu_i$ to the whole ambient space by letting
$\nu_i(q)=\nabla dist_N^i(q)$ for $q\in\mathbbm R^n$, $1\le i\le m$.

Finally, we fix a smooth cut-off function $\varphi\in C_c^{\infty}(B)$ satisfying
$0\le\varphi\le 1$ with $\varphi\equiv 1$ on $B_{1/2}(0)$, and for any $z_0\in B$,
any $0<R<1$ we scale
\begin{equation*}
\varphi_{z_0,R}(z)=\varphi((z-z_0)/R)\in C_c^{\infty}(B_R(z_0)).
\end{equation*}

\section{Energy inequality and first consequences}\label{Energy}
The half-harmonic heat flow may be regarded as the heat flow for the Dirichlet energy in the class $H^{1/2}(S^1;N)$. Indeed, let $u(t)$ be a smooth solution of \eqref{1.3}, \eqref{1.4} for $0<t<T_0$. Then we have the following result.

\begin{lemma}\label{lemma2.1}
For any $0<T<T_0$ there holds
\begin{equation*}
E(u(T))+\int_0^T\int_{\partial B}|u_t|^2d\phi\;dt\le E(u_0).
\end{equation*}
\end{lemma}

\begin{proof} Integrating by parts and using \eqref{1.3} we compute
\begin{equation*}
\begin{split}
\frac{d}{dt}E(u)=\int_B\nabla u&\nabla u_t\,dx
=\int_{\partial B}u_r\cdot u_t\,d\phi\\
&=-\int_{\partial B}|d\pi_N(u)u_r|^2d\phi=-\int_{\partial B}|u_t|^2d\phi
\end{split}
\end{equation*}
for any $0<t<T_0$. The claim follows by integration.
\end{proof}

Moreover, there holds a localized version of this energy inequality.

\begin{lemma}\label{lemma2.2}
There exists a constant $C>0$ such that for any $z_0\in B$, any $0<R<1$, any $\varepsilon>0$, and any
$0<t_0<t_1\le t_0+\varepsilon R<T_0$
there holds
\begin{equation*}
\begin{split}
\int_B|\nabla u(t_1)|^2\varphi_{z_0,R}^2dz
+4\int_{t_0}^{t_1}\int_{\partial B}&|u_t|^2\varphi_{z_0,R}^2d\phi\,dt\\
&\le4\int_B|\nabla u(t_0)|^2\varphi_{z_0,R}^2dz+C\varepsilon E(u_0).
\end{split}
\end{equation*}
\end{lemma}

\begin{proof}
Writing $\varphi=\varphi_{z_0,R}$ for brevity, integrating by parts,
and using Young's inequality, similar to the proof of Lemma \ref{lemma2.1}
for any $0<t<T_0$ we have
\begin{equation}\label{2.1}
\begin{split}
\frac{d}{dt}&\big(\frac12\int_B|\nabla u|^2\varphi^2dz\big)
=\int_{\partial B}u_t\cdot u_r\varphi^2d\phi
-\int_B u_tdiv(\nabla u\varphi^2)dz\\
&=-\int_{\partial B}|d\pi_N(u)u_r|^2\varphi^2d\phi
-2\int_B u_t\nabla u\varphi\nabla\varphi dz\\
&\le-\int_{\partial B}|u_t|^2\varphi^2d\phi
+(8\varepsilon R)^{-1}\int_B|\nabla u|^2\varphi^2dz
+8\varepsilon R\int_B|u_t|^2|\nabla\varphi|^2dz.
\end{split}
\end{equation}
Letting
\begin{equation*}
A=\sup_{t_0<t<t_1}\big(\frac12\int_B|\nabla u(t)|^2\varphi^2dz\big),
\end{equation*}
then upon integration we find
\begin{equation*}
\begin{split}
A&+\int_{t_0}^{t_1}\int_{\partial B}|u_t|^2\varphi^2d\phi\;dt\\
&\le\int_B|\nabla u(t_0)|^2\varphi^2dz+\frac{t_1-t_0}{2\varepsilon R}A
+C\varepsilon R^{-1}\int_{t_0}^{t_1}\int_{B_R(z_0)\cap B}|u_t|^2dz\,dt.
\end{split}
\end{equation*}
But with $u=u(t)$ also $u_t=u_t(t)$ is harmonic for each $t$. Expanding
\begin{equation*}
u_t(re^{i\phi})=\sum_{k\ge 0}a_kr^ke^{ik\phi}
\end{equation*}
in a Fourier series, we see that the map
\begin{equation*}
r\mapsto\int_{\partial B_r(0)}|u_t|^2ds=2\pi\sum_{k\ge 0}|a_k|^2r^{2k+1},
\end{equation*}
with $ds$ denoting the element of length along $\partial B_r(0)$, is non-decreasing.
Thus for any $z_0\in B$, any $0<R<1$, and any $t_0<t<t_1$ there holds
\begin{equation}\label{2.2}
\begin{split}
\int_{B_R(z_0)\cap B}&|u_t|^2dz\le 2R\int_{\partial B}|u_t|^2d\phi,
\end{split}
\end{equation}
and we may use Lemma \ref{lemma2.1} to conclude.
\end{proof}

\section{A regularity estimate}\label{A regularity estimate}
To illustrate the key ideas that later will allow us to prove higher regularity and analyze blow-up of solutions of \eqref{1.3},
we first consider smooth solutions $u\in H^{1/2}(S^1;N)$ of the equation
\begin{equation}\label{3.1}
d\pi_N(u)\partial_ru+f=0\ \hbox{ on }\partial B=S^1,
\end{equation}
where $f\in L^2(S^1)$. We prove the following a-priori estimate, similar to Wettstein \cite{Wettstein-2022}, Lemma 3.4. Note that with the truncated signed distance function $dist_N\colon\mathbbm R^n\to\mathbbm R^m$ we have the orthogonal decomposition
\begin{equation}\label{3.2}
\partial_ru=d\pi_N(u)\partial_ru+d\pi_N^{\perp}(u)\partial_ru
=d\pi_N(u)\partial_ru+\nu(u)\partial_r(dist_N(u))
\end{equation}
on $\partial B=S^1$, where we recall that we use the shorthand notation
\begin{equation*}
\nu(u)\partial_r(dist_N(u))=\sum_{i=1}^m\nu_i(u)\partial_r(dist^i_N(u))
=\sum_{i=1}^m\nu_i(u)\nu_i(u)\cdot\partial_ru
\end{equation*}
and extend
$\nu_i(p)=\nabla dist^i_N(p)$, $p\in\mathbbm R^n$.

\begin{proposition}\label{prop3.1}
There exist constants $C,\delta_0=\delta_0(N)>0$ such that for any smooth solution
$u\in H^{1/2}(S^1;N)$ of \eqref{3.1} with $E(u)\le\delta^2<\delta_0^2$ there holds
\begin{equation}\label{3.3}
\int_{S^1}|\partial_{\phi}u|^2d\phi\le C\|f\|^2_{L^2(S^1)}.
\end{equation}
\end{proposition}

\begin{proof}
Multiplying \eqref{3.2} with $\partial_ru$, we find the Pythagorean identity
\begin{equation}\label{3.4}
|\partial_ru|^2=|d\pi_N(u)\partial_ru|^2+|d\pi^{\perp}_N(u)\partial_ru|^2
=|d\pi_N(u)\partial_ru|^2+|\partial_r(dist_N(u))|^2.
\end{equation}

Note that $dist_N(u)\in H^1_0(B)$; moreover, for each $1\le i\le m$ we have
$\nabla(dist^i_N(u))=\nu_i(u)\cdot\nabla u$, and there holds the equation
\begin{equation}\label{3.5}
\Delta(dist^i_N(u))=div(\nu_i(u)\cdot\nabla u)=\nabla u\cdot d\nu_i(u)\nabla u\ \hbox{ in }B.
\end{equation}
The divergence theorem now gives
\begin{equation*}
\begin{split}
\|\partial_r&(dist_N(u))\|^2_{L^2(S^1)}\\
&=(\nabla(dist_N(u)),\nabla(dist_N(u))_r)_{L^2(B)}
+(\Delta(dist_N(u)),(dist_N(u))_r)_{L^2(B)}\\
&\le C\|\nabla u\|_{L^2(B)}\|\nabla^2(dist_N(u))\|_{L^2(B)}
\le C\delta\|\nabla^2(dist_N(u))\|_{L^2(B)},
\end{split}
\end{equation*}
where the basic $L^2$-theory for the Laplace equation \eqref{3.5} yields the bound
\begin{equation*}
\|\nabla^2(dist_N(u))\|_{L^2(B)}\le C\|\Delta(dist^i_N(u))\|_{L^2(B)}
\le C\|\nabla u\|^2_{L^4(B)}.
\end{equation*}
With Sobolev's embedding $H^{1/2}(B)\hookrightarrow L^4(B)$ we then conclude
\begin{equation*}
\|\partial_r(dist_N(u))\|^2_{L^2(S^1)}\le C\delta\|\nabla u\|^2_{H^{1/2}(B)}.
\end{equation*}

Thus from \eqref{3.4} and \eqref{3.1} we have
\begin{equation}\label{3.6}
\begin{split}
\|\partial_ru\|^2_{L^2(S^1)}&\le\|f\|^2_{L^2(S^1)}+\|\partial_r(dist_N(u))\|^2_{L^2(S^1)}\\
&\le\|f\|^2_{L^2(S^1)}+C\delta\|\nabla u\|^2_{H^{1/2}(B)}.
\end{split}
\end{equation}
But Fourier expansion of the harmonic function $u$ gives
\begin{equation}\label{3.7}
\|\partial_{\phi}u\|^2_{L^2(S^1)}=\|\partial_ru\|^2_{L^2(S^1)}=\frac12\|\nabla u\|^2_{L^2(S^1)}
\end{equation}
as well as the bound
\begin{equation*}
\|\nabla u\|^2_{H^{1/2}(B)}\le C\|\nabla u\|^2_{L^2(S^1)},
\end{equation*}
and from \eqref{3.6} we obtain
\begin{equation*}
\begin{split}
\|\partial_ru\|^2_{L^2(S^1)}&\le\|f\|^2_{L^2(S^1)}+C\delta\|\nabla u\|^2_{H^{1/2}(B)}
\le\|f\|^2_{L^2(S^1)}+C\delta\|\partial_r u\|^2_{L^2(S^1)},
\end{split}
\end{equation*}
which for sufficiently small $\delta>0$ by \eqref{3.7} yields the claim.
\end{proof}

In particular, from Proposition \ref{prop3.1} we obtain a positive energy threshold for non-constant solutions of \eqref{1.6}.

\begin{corollary}\label{cor3.2}
Suppose $u\in H^{1/2}(S^1;N)$ smoothly solves \eqref{1.6}. Then, either $u$ is constant, or
$E(u)\ge\delta_0^2$, with $\delta_0=\delta_0(N)>0$ given by Proposition \ref{prop3.1}.
\end{corollary}

Combining the ideas in the proof of the previous result with ideas from the classical proof of the Courant-Lebesgue lemma in minimal surface theory, we can obtain the following local version of Proposition \ref{prop3.1}.

\begin{proposition}\label{prop3.3}
There exists a constant $\delta>0$ with the following property. Given any smooth solution
$u\in H^{1/2}(S^1;N)$ of \eqref{3.1} with harmonic extension $u\in H^1(B)$,
any $z_0\in\partial B$, and any $0<R\le 1/2$ such that
\begin{equation}\label{3.8}
\int_{B_R(z_0)\cap B}|\nabla u|^2dz<\delta^2,
\end{equation}
with a constant $C=C(R)>0$ there holds
\begin{equation*}
\int_{B_{R^2}(z_0)\cap S^1}|\partial_{\phi}u|^2d\phi\le C\|f\|^2_{L^2(B_R(z_0)\cap S^1)}
+CE(u).
\end{equation*}
\end{proposition}

\begin{proof}
Fix any $z_0\in\partial B$ and $0<R\le 1/2$ such that \eqref{3.8} holds.
For suitable $\rho\in[R^2,R]$, with $s$ denoting arc-length along the curve
$C_{\rho}=\{z_0+\rho e^{i\theta}\in B; \theta\in\mathbbm R\}$
with end-points $z_j=z_0+\rho e^{i\theta_j}=e^{i\phi_j}\in\partial B$, $j=1,2$, we have
\begin{equation*}
\begin{split}
\rho\int_{C_{\rho}}|\nabla u|^2ds
\le 2\inf_{R^2<\rho'<R}\Big(\rho'\int_{C_{\rho'}}|\nabla u|^2ds\Big).
\end{split}
\end{equation*}
We can bound the latter infimum by the average over $\rho\in[R^2,R]$
with respect to the measure with density $1/\rho$ to obtain the bound
\begin{equation}\label{3.9}
\begin{split}
\rho\int_{C_{\rho}}&|\nabla u|^2ds
\le 2\int_{R^2}^R\int_{C_{\rho}}|\nabla u|^2ds\,d\rho\Big/\int_{R^2}^R\frac{d\rho}{\rho}\\
&\le 2\int_B|\nabla u|^2dz\Big/|\log(R)|=4 E(u)/|\log(R)|.
\end{split}
\end{equation}
Let $\Phi_0\colon B\to B$ be the conformal map fixing the circular arc
$C_{\rho}$ and mapping the point $z_0$ to the point $-z_0$, obtained as composition $\Phi_0=\pi_0^{-1}\circ\Psi_0\circ\pi_0$ of stereographic projection
$\pi_0\colon B\to\mathbbm R^2_+$ from the point $-z_0$ and reflection $\Psi_0\colon\mathbbm R^2_+\to\mathbbm R^2_+$
of the upper half-plane $\mathbbm R^2_+$ in the half-circle $\pi_0(C_{\rho})$.
Replacing $u$ by the map $u\circ\Phi_0$ in $B\setminus B_{\rho}(z_0)$
we obtain a piecewise smooth map $v_1\colon B\to\mathbbm R^n$ which is harmonic on
$B\setminus C_{\rho}$ and continuous on all of $B$.
Let $v_0\in H^1(B)$ be harmonic with $w:=v_1-v_0\in H_0^1(B)$.
Note that by the variational characterization of harmonic functions and conformal invariance of the Dirichlet integral we have
\begin{equation}\label{3.10}
E(v_0)\le E(v_1)\le\int_{B_R(z_0)\cap B}|\nabla u|^2dz\le\delta^2.
\end{equation}
Moreover, for any smooth $\varphi\in H_0^1(B)$ by \eqref{3.9} we can estimate
\begin{equation*}
\begin{split}
\big|\int_B&\nabla w\nabla\varphi dz\big|=\big|\int_B\nabla v_1\nabla\varphi dz\big|
=\big|\int_{C_{\rho}}[\partial_{\nu}v_1]\varphi ds\big|\\
&\le\big(\int_{C_{\rho}}|\nabla u|^2ds\Big)^{1/2}
\big(\int_{C_{\rho}}|\varphi|^2ds\Big)^{1/2}
\le C(R)E(u)^{1/2}\|\varphi\|_{H^{1/2}(B)},
\end{split}
\end{equation*}
where $[\partial_{\nu}v_1]$ denotes the difference of the outer and inner normal derivatives of $v_1$ along $C_{\rho}$.
Thus we have $\Delta w\in H^{-1/2}(B)$, and the basic $L^2$-theory for the Laplace equation gives $w\in H^{3/2}\cap H_0^1(B)$ with
\begin{equation*}
\begin{split}
\|w\|_{H^{3/2}(B)}
\le\sup_{\varphi\in H_0^1(B),\|\varphi\|_{H^{1/2}(B)}\le 1}
\big(\int_B\nabla w\nabla\varphi dz\big)\le C(R)E(u)^{1/2}
\end{split}
\end{equation*}
and then also
\begin{equation}\label{3.11}
\|\partial_rw\|^2_{L^2(S^1)}\le C\|w\|^2_{H^{3/2}(B)}\le C(R)E(u).
\end{equation}

In view of \eqref{3.10}, for sufficiently small $\delta>0$
from Proposition \ref{prop3.1} we obtain the estimate
\begin{equation}\label{3.12}
\|\partial_{\phi}v_0\|^2_{L^2(S^1)}\le C\|d\pi_N(v_0)\partial_rv_0\|^2_{L^2(S^1)}.
\end{equation}
Observe that since $v_0=v_1$ on $\partial B=S^1$ and since we also have $v_1=u$ on
$B\cap B_{\rho}(z_0)$, $v_1=u\circ\Phi_0$ on $B\setminus B_{\rho}(z_0)$,
respectively, we can bound
\begin{equation*}
\begin{split}
\|d\pi_N(v_0)\partial_rv_0\|^2_{L^2(S^1)}&=\|d\pi_N(v_1)\partial_rv_0\|^2_{L^2(S^1)}\\
&\le 2\|d\pi_N(v_1)\partial_rv_1\|^2_{L^2(S^1)}+2\|\partial_rw\|^2_{L^2(S^1)}
\end{split}
\end{equation*}
and
\begin{equation*}
\|d\pi_N(v_1)\partial_rv_1\|^2_{L^2(S^1)}
\le C(R)\|d\pi_N(u)\partial_ru\|^2_{L^2(S^1\cap B_{\rho}(z_0))}.
\end{equation*}
Thus from \eqref{3.11} we obtain
\begin{equation*}
\begin{split}
\|d\pi_N(v_0)\partial_rv_0\|^2_{L^2(S^1)}
&\le C(R)\|d\pi_N(u)\partial_ru\|^2_{L^2(S^1\cap B_{\rho}(z_0)}
+C\|\partial_rw\|^2_{L^2(S^1)}\\
&\le C(R)\|f\|^2_{L^2(S^1\cap B_{\rho}(z_0))}+C(R)E(u),
\end{split}
\end{equation*}
and from \eqref{3.12} there results the bound
\begin{equation*}
\begin{split}
\|\partial_{\phi}&u\|^2_{L^2(S^1\cap B_{\rho}(z_0))}
=\|\partial_{\phi}v_0\|^2_{L^2(S^1\cap B_{\rho}(z_0))}
\le\|\partial_{\phi}v_0\|^2_{L^2(S^1)}\\
&\le C\|d\pi_N(v_0)\partial_rv_0\|^2_{L^2(S^1)}
\le C(R)\|f\|^2_{L^2(S^1\cap B_R(z_0)))}+C(R)E(u),
\end{split}
\end{equation*}
as claimed.
\end{proof}

The local estimate Proposition \ref{prop3.3} also implies the following global bound.

\begin{proposition}\label{prop3.4}
There exists a constant $\delta>0$ with the following property. Given any smooth solution
$u\in H^{1/2}(S^1;N)$ of \eqref{3.1}, any $0<R\le 1/2$ with
\begin{equation}\label{3.13}
\sup_{z_0\in B}\int_{B_R(z_0)\cap B}|\nabla u|^2dz<\delta^2,
\end{equation}
there holds
\begin{equation*}
\int_{S^1}|\partial_{\phi}u|^2d\phi\le C(R)\|f\|^2_{L^2(S^1)}
+C(R)E(u).
\end{equation*}
\end{proposition}

\begin{proof}
Covering $\partial B$ with balls $B_{R^2}(z_i)$, $1\le i\le i_0$, from Proposition \ref{prop3.3} we obtain the claim.
\end{proof}

\begin{remark}\label{rem3.5}
The proofs of the above propositions only require $u\in H^1(S^1;N)$ with harmonic extension
$u\in H^{3/2}(B)$.
\end{remark}

\section{Higher regularity}\label{Higher regularity}
Again let $u(t)$ be a smooth solution of the half-harmonic heat flow \eqref{1.3}
for $0<t<T_0$ with smooth initial data \eqref{1.4}. We show that as long as the flow does not concentrate energy in the sense of Theorem \ref{thm1.1}.ii) the solution remains smooth and can be a-priori bounded in any $H^k$-norm in terms of the data.

\subsection{$H^2$-bound}
In a first step we show an $L^2$-bound in space-time for the second derivatives of our solution to the flow \eqref{1.3}. Recall that by harmonicity,
writing $u=u(t)$, $\partial_{\phi}u=u_{\phi}$, and so on, for any $0<t<T_0$ we have
\eqref{3.7}, that is,
\begin{equation*}
\int_{\partial B}|u_{\phi}|^2d\phi=\int_{\partial B}|u_r|^2d\phi,
\end{equation*}
as Fourier expansion shows, with similar identities for partial derivatives of $u$
of higher order. Indeed, writing
\begin{equation}\label{4.1}
\Delta u=\frac{1}{r}(ru_r)_r+\frac{1}{r^2}u_{\phi\phi}
\end{equation}
we see that also $\partial_{\phi}^ju$ and then also $\nabla^{k-j}\partial_{\phi}^ju$
is harmonic for any $j\le k$ in $\mathbbm N_0$, where $\nabla u=(u_x,u_y)$ in Euclidean coordinates $z=x+iy$. Thus by induction we obtain
\begin{equation}\label{4.2}
\int_{\partial B}|\nabla^ku|^2d\phi=2\int_{\partial B}|\nabla^{k-1}u_{\phi}|^2d\phi
=\dots=2^k\int_{\partial B}|\partial_{\phi}^ku|^2d\phi
\end{equation}
for any $k\in\mathbbm N$.
Similarly, for any $1/4<r<1$ with uniform constants $C>0$ we have
\begin{equation*}
\int_{\partial B_r(0)}|\nabla^ku|^2dz
\le C\int_{\partial B_r(0)}|\nabla^{k-1}u_{\phi}|^2dz
\le\dots\le C\int_{\partial B_r(0)}|\partial_{\phi}^ku|^2dz.
\end{equation*}
Integrating, and using the mean value property of harmonic functions together with \eqref{4.2}
to bound
\begin{equation*}
\sup_{B_{1/4}(0)}|\nabla^ku|^2
\le C\int_{B\setminus B_{1/4}(0)}|\nabla^ku|^2dz\le C\int_B|\nabla\partial_{\phi}^{k-1}u|^2dz,
\end{equation*}
in particular, for any $k\in\mathbbm N$ we have the bound
\begin{equation}\label{4.3}
\int_B|\nabla^ku|^2dz\le C\int_B|\nabla\partial_{\phi}^{k-1}u|^2dz
\end{equation}
with an absolute constant $C>0$.

The following lemma is strongly reminiscent of analogous results for the harmonic map heat flow in two space dimensions.

\begin{lemma}\label{lemma4.1}
With a constant $C>0$ depending only on $N$ there holds
\begin{equation*}
\frac{d}{dt}\big(\int_{\partial B}|u_{\phi}|^2d\phi\big)+\int_B|\nabla u_{\phi}|^2dz
\le C\int_B|\nabla u|^2|u_{\phi}|^2dz.
\end{equation*}
\end{lemma}

\begin{proof}
Writing $d\pi_N(u)=1-d\pi^{\perp}_N(u)$ with
\begin{equation*}
d\pi^{\perp}_N(u)X=\nu(u)\nu(u)\cdot X=\sum_{i=1}^m\nu_i(u)\nu_i(u)\cdot X
\end{equation*}
for any $X\in\mathbbm R^n$, we compute
\begin{equation*}
\begin{split}
\frac12\frac{d}{dt}&\big(\int_{\partial B}|u_{\phi}|^2d\phi\big)
=\int_{\partial B}u_{\phi}\cdot u_{\phi,t}d\phi
=-\int_{\partial B}u_{\phi\phi}\cdot u_td\phi\\
&=\int_{\partial B}u_{\phi\phi}\cdot d\pi_N(u)u_rd\phi
=-\int_{\partial B}\big(u_{\phi}\cdot u_{r\phi}
-u_{\phi}\cdot\partial_{\phi}(\nu(u)\,\nu(u)\cdot u_r)\big)d\phi\\
&=-\frac12\int_{\partial B}\partial_r(|u_{\phi}|^2)d\phi
-\int_{\partial B}u_{\phi}\cdot d\nu(u)u_{\phi}\,\nu(u)\cdot u_rd\phi,
\end{split}
\end{equation*}
where we use orthogonality $u_{\phi}\cdot\nu_i(u)=0$ on $\partial B$, $1\le i\le m$,
in the last step.
But $u_{\phi}$ is harmonic. So with $\Delta|u_{\phi}|^2=2|\nabla u_{\phi}|^2$,
from Gauss' theorem we obtain
\begin{equation*}
\frac12\int_{\partial B}\partial_r(|u_{\phi}|^2)d\phi=\int_B|\nabla u_{\phi}|^2dz.
\end{equation*}
On the other hand, by Young's inequality we can estimate
\begin{equation*}
\begin{split}
\int_{\partial B}&u_r\cdot\nu(u)\,u_{\phi}\cdot d\nu(u)u_{\phi}d\phi
=\int_B\nabla u\cdot\nabla\big(\nu(u)\,u_{\phi}\cdot d\nu(u)u_{\phi}\big)dz\\
&\le C\int_B|\nabla u_{\phi}||\nabla u||u_{\phi}|dz
+C\int_B|\nabla u|^2|u_{\phi}|^2dz\\
&\le\frac12\int_B|\nabla u_{\phi}|^2dz
+C\int_B|\nabla u|^2|u_{\phi}|^2dz,
\end{split}
\end{equation*}
and our claim follows.
\end{proof}

Combining the previous result with a quantitative bound for the concentration of energy, we obtain a space-time bound for the second derivatives of $u$.
Note that since $u$ is smooth by assumption,
for any $\delta>0$, any $T<T_0$ there exists a number $R=R(T,u)>0$ such that
\begin{equation}\label{4.4}
\sup_{z_0\in B,\,0<t<T}\int_{B_R(z_0)\cap B}|\nabla u(t)|^2dz<\delta.
\end{equation}

\begin{proposition}\label{prop4.2}
There exist constants $\delta=\delta(N)>0$ and $C>0$ such that for any $T<T_0$ with
$R>0$ as in \eqref{4.4} there holds
\begin{equation}\label{4.5}
\begin{split}
\sup_{0<t<T}\int_{\partial B}|u_{\phi}(t)|^2d\phi
&+\int_0^T\int_B|\nabla u_{\phi}|^2dx\,dt\\
&\le C\int_{\partial B}|u_{0,\phi}|^2d\phi+CTR^{-2}E(u_0).
\end{split}
\end{equation}
\end{proposition}

\begin{proof}
For given $T<T_0$ and $\delta>0$ to be determined we fix $R>0$ such that \eqref{4.4} holds. Let $B_{R/2}(z_i)$, $1\le i\le i_0$,
be a cover of $B$ such that any point $z_0\in B$ belongs to at most $L$ of the balls
$B_R(z_i)$, where $L\in\mathbbm N$ is independent of $R>0$. We then split
\begin{equation*}
\int_B|\nabla u|^2|u_{\phi}|^2dz
\le\sum_{i=1}^{i_0}\int_{B_{R/2}(z_i)}|\nabla u|^4dz
\le\sum_{i=1}^{i_0}\int_B|\nabla(u\varphi_{z_i,R})|^4dz.
\end{equation*}
Using the multiplicative inequality \eqref{A.2} in the Appendix for each $i$ we can bound
\begin{equation*}
\int_B|\nabla(u\varphi_{z_i,R})|^4dz
\le C\delta\int_{B_R(z_i)}\big(|\nabla^2u|^2+R^{-2}|\nabla u|^2\big)dz.
\end{equation*}
Summing over $1\le i\le i_0$, we thus obtain the bound
\begin{equation*}
\begin{split}
\int_B|\nabla u|^2|u_{\phi}|^2dz
&\le CL\delta\int_B|\nabla^2u|^2dz+CL\delta R^{-2}E(u)\\
&\le CL\delta\int_B|\nabla u_{\phi}|^2dz+CL\delta R^{-2}E(u_0),
\end{split}
\end{equation*}
and for sufficiently small $\delta>0$ from Lemma \ref{lemma4.1} we obtain the claim.
\end{proof}

With the help of Proposition \ref{prop4.2} we can now bound $u$ in $H^2(B)$ also uniformly in time. For this, we first note the following estimate, which also will be useful later for bounding higher order derivatives.

\begin{lemma}\label{lemma4.3}
For any $k\in\mathbbm N$, with a constant $C>0$ depending only on $k$ and $N$,
for the solution $u=u(t)$ to \eqref{1.3}, \eqref{1.4} for any $0<t<T_0$ there holds
\begin{equation*}
\begin{split}
\frac{d}{dt}\big(\|\nabla\partial^k_{\phi}&u\|^2_{L^2(B)}\big)
+\|\partial^k_{\phi}u_r\|^2_{L^2(S^1)}\\
&\le C\sum_{1\le j_i\le k+1,\,\Sigma_ij_i\le k+2}\|\nabla\partial^k_{\phi}u\|_{L^2(B)}
\|\Pi_i\nabla^{j_i}u\|_{L^2(B)}.
\end{split}
\end{equation*}
\end{lemma}

\begin{proof}
For any $k\in\mathbbm N$ we use harmonicity of $\partial^{2k}_{\phi}u$ to compute
\begin{equation}\label{4.6}
\begin{split}
\frac12&\frac{d}{dt}\big(\|\nabla\partial^k_{\phi}u\|^2_{L^2(B)}\big)
=(-1)^k\int_B\nabla\partial^{2k}_{\phi}u\nabla u_t\;dx\\
&=(-1)^k(\partial^{2k}_{\phi}u_r,u_t)_{L^2(S^1)}
=(-1)^{k+1}(\partial^{2k}_{\phi}u_r,d\pi_N(u)u_r)_{L^2(S^1)}\\
&=-(\partial^k_{\phi}u_r,\partial^k_{\phi}u_r)_{L^2(S^1)}
+(\partial^k_{\phi}u_r,\partial^k_{\phi}(\nu(u)\,\nu(u)\cdot u_r))_{L^2(S^1)}\\
&=-\|\partial^k_{\phi}u_r\|^2_{L^2(S^1)}+I,
\end{split}
\end{equation}
where we split $I=\sum_{j=0}^k\Big({k\atop j}\Big)I_j$ with
\begin{equation*}
\begin{split}
I_j&=(\partial^k_{\phi}u_r,\partial^j_{\phi}(\nu(u)\,\nu(u))
\partial^{k-j}_{\phi}u_r)_{L^2(S^1)}\\
&=(\nabla\partial^k_{\phi}u,\nabla(\partial^j_{\phi}(\nu(u)\nu(u))
\cdot\partial^{k-j}_{\phi}u_r))_{L^2(B)}.
\end{split}
\end{equation*}
Hence for any $1\le j\le k$ we can bound
\begin{equation*}
\begin{split}
|I_j|&\le C\sum_{0\le i\le j}\|\nabla\partial^k_{\phi}u\|_{L^2(B)}
\|\nabla\partial^{j-i}_{\phi}\nu(u)\partial^{i}_{\phi}\nu(u)
\partial^{k-j}_{\phi}u_r\|_{L^2(B)}\\
&+C\sum_{0\le i\le j}\|\nabla\partial^k_{\phi}u\|_{L^2(B)}
\|\partial^{j-i}_{\phi}\nu(u)\partial^i_{\phi}\nu(u)
\nabla\partial^{k-j}_{\phi}u_r\|_{L^2(B)}\\
&\le C\sum_{1\le j_i\le k+1,\, \Sigma_ij_i=k+2}\|\nabla\partial^k_{\phi}u\|_{L^2(B)}
\|\Pi_i\nabla^{j_i}u\|_{L^2(B)},
\end{split}
\end{equation*}
as claimed. It remains to bound the term
$I_0=\|\partial^k_{\phi}u_r\cdot\nu(u)\|^2_{L^2(S^1)}$.
With the signed distance function we can express
\begin{equation*}
\nu(u)\cdot u_{\phi r}=\big(\nu(u)\cdot u_r\big)_{\phi}-u_r\cdot d\nu(u)u_{\phi}
=(dist_N(u))_{\phi r}-u_r\cdot d\nu(u)u_{\phi},
\end{equation*}
so that
\begin{equation*}
\begin{split}
I_0=\|\partial^k_{\phi}u_r&\cdot\nu(u)\|^2_{L^2(S^1)}
=\big(\partial^k_{\phi}u_r\cdot\nu(u),\partial^k_{\phi}(dist_N(u))_r\big)_{L^2(S^1)}+II\\
&=\big(\nabla\partial^k_{\phi}u,
\nabla\big(\nu(u)\partial^k_{\phi}(dist_N(u))_r\big)\big)_{L^2(B)}+II,
\end{split}
\end{equation*}
where all terms in $II$ can be dealt with as in the case $1\le j\le k$. Finally,
we have
\begin{equation*}
\begin{split}
\big(\nabla&\partial^k_{\phi}u,
\nabla\big(\nu(u)\partial^k_{\phi}(dist_N(u))_r\big)\big)_{L^2(B)}\\
&\le\|\nabla\partial^k_{\phi}u\|_{L^2(B)}
\big(\|\nabla^2\partial^k_{\phi}(dist_N(u))\|_{L^2(B)}
+\|\nabla\nu(u)\partial^k_{\phi}(dist_N(u))_r\|_{L^2(B)}\big).
\end{split}
\end{equation*}
But by the chain rule we can bound
\begin{equation*}
\begin{split}
\|\nabla\nu(u)\partial^k_{\phi}&(dist_N(u))_r\|_{L^2(B)}\big)
\le C\|\nabla u\nabla^{k+1}(dist_N(u))\|_{L^2(B)}\big)\\
&\le C\sum_{1\le j_i\le k+1,\,\Sigma_ij_i=k+2}\|\Pi_i\nabla^{j_i}u\|_{L^2(B)}.
\end{split}
\end{equation*}
Moreover, by \eqref{3.5} and elliptic regularity theory, there holds
\begin{equation*}
\begin{split}
\|\nabla^{k+2}&(dist_N(u))\|^2_{L^2(B)}\le C\|\Delta(dist_N(u))\|^2_{H^k(B)}
\le C\|\nabla u\cdot d\nu_i(u)\nabla u\|^2_{H^k((B)}\\
&\le C\sum_{1\le j_i\le k+1,\,\Sigma_ij_i\le k+2}\|\Pi_i\nabla^{j_i}u\|_{L^2(B)},
\end{split}
\end{equation*}
which gives the claim.
\end{proof}

For $k=1$, from Proposition \ref{prop4.2} we now easily derive a uniform $L^2$-bound for the second derivatives of the flow.

\begin{proposition}\label{prop4.4}
For any smooth $u_0\in H^{1/2}(S^1;N)$ and any $T<T_0$ with $R>0$ as in Proposition \ref{prop4.2} with a constant $C_1=C_1(T,R,u_0)>0$ depending on the right hand side of \eqref{4.5} there holds
\begin{equation*}
\begin{split}
\sup_{0<t<T}&\int_B|\nabla u_{\phi}(t)|^2dz
+\int_0^{T}\int_{\partial B}|u_{\phi r}|^2d\phi\,dt
\le C_1\int_B|\nabla u_{0,\phi}|^2dz+C_1.
\end{split}
\end{equation*}
\end{proposition}

\begin{proof}
For $k=1$ by Lemma \ref{lemma4.3} we need to bound the term
\begin{equation*}
\begin{split}
J=\sum_{1\le j_i\le 2,\, \Sigma_ij_i\le 3}\|\Pi_i\nabla^{j_i}u\|_{L^2(B)}
\le C\||\nabla^2 u||\nabla u|+|\nabla u|^3\|_{L^2(B)}+J_1,
\end{split}
\end{equation*}
where $J_1$ contains all terms of lower order.
By the maximum principle and Sobolev's embedding
$H^1(\partial B)\hookrightarrow L^{\infty}(\partial B)$ we can estimate
\begin{equation*}
\begin{split}
\|\nabla u\|^2_{L^{\infty}(B)}\le\|\nabla u\|^2_{L^{\infty}(\partial B)}
\le C\|\nabla u\|^2_{H^1(\partial B)}
\le C\|u_{\phi r}\|^2_{L^2(\partial B)}+C_1,
\end{split}
\end{equation*}
where we have also used \eqref{3.7} and Proposition \ref{prop4.2}. Also bounding
\begin{equation*}
\begin{split}
\|\nabla u\|^3_{L^6(B)}&\le\|\nabla u\|^2_{L^4(B)}\|\nabla u\|_{L^{\infty}(B)}\\
&\le C\big(\|\nabla^2 u\|_{L^2(B)}\|\nabla u\|_{L^2(B)}+E(u)\big)\|\nabla u\|_{L^{\infty}(B)}
\end{split}
\end{equation*}
via \eqref{A.2}, and again using \eqref{3.7}
(and with similar, but simpler bounds for $J_1$), we arrive at the estimate
\begin{equation*}
\begin{split}
J&\le C\||\nabla^2 u||\nabla u|+|\nabla u|^3\|_{L^2(B)}+C_1\\
&\le C\big(\|\nabla^2 u\|_{L^2(B)}+E(u)\big)\|\nabla u\|_{L^{\infty}(B)}+C_1\\
&\le C\big(1+\|\nabla u_{\phi}\|_{L^2(B)}+E(u_0)\big)
\big(\|u_{\phi r}\|_{L^2(\partial B)}+C_1\big).
\end{split}
\end{equation*}
With Lemma \ref{lemma4.3} and Young's inequality we then have
\begin{equation}\label{4.7}
\begin{split}
\frac{d}{dt}&\big(1+\|\nabla u_{\phi}\|^2_{L^2(B)}\big)+\|u_{\phi r}\|^2_{L^2(S^1)}\\
&\le C\|\nabla u_{\phi}\|_{L^2(B)}\big(\|\nabla u_{\phi}\|_{L^2(B)}+E(u_0)\big)
\big(\|u_{\phi r}\|_{L^2(\partial B)}+C_1\big)\\
&\le\frac12\|u_{\phi r}\|^2_{L^2(\partial B)}
+C(1+\|\nabla u_{\phi}\|^2_{L^2(B)})\big(\|\nabla u_{\phi}\|^2_{L^2(B)}+C_1\big).
\end{split}
\end{equation}
Absorbing the first term on the right on the left hand side of this inequality and dividing by $1+\|\nabla u_{\phi}\|^2_{L^2(B)}$ we obtain
\begin{equation*}
\begin{split}
\frac{d}{dt}&\big(\log\big(1+\|\nabla u_{\phi}\|^2_{L^2(B)}\big)\big)
\le C\|\nabla u_{\phi}\|^2_{L^2(B)}+C_1,
\end{split}
\end{equation*}
and from Proposition \ref{prop4.2} we obtain the bound
\begin{equation*}
\begin{split}
\sup_{0<t<T}&\|\nabla u_{\phi}(t)\|^2_{L^2(B)}\le C_1(1+\|\nabla u_{0,\phi}\|^2_{L^2(B)}).
\end{split}
\end{equation*}
The claim then follows from \eqref{4.7}.
\end{proof}

\subsection{$H^3$-bounds}
The derivation of a-priori $L^2$-bounds for third derivatives of the solution $u$ to the flow \eqref{1.3}, \eqref{1.4} requires special care, which is why we highlight this case.

\begin{proposition}\label{prop4.5}
For any smooth $u_0\in H^{1/2}(S^1;N)$ and any $T<T_0$ there holds
\begin{equation*}
\sup_{0<t<T}\int_B|\nabla u_{\phi\phi}(t)|^2dz
+\int_0^T\int_{\partial B}|u_{\phi\phi r}|^2d\phi\,dt
\le C_2\int_B|\nabla u_{0,\phi\phi}|^2dz+C_2,
\end{equation*}
where we denote as $C_2=C_2(T,R,u_0)>0$ a constant bounded by the terms on the right hand side in the statements of Propositions \ref{prop4.2} and \ref{prop4.4}.
\end{proposition}

\begin{proof}
For $k=2$ by Lemma \ref{lemma4.3} we need to bound the term
\begin{equation*}
\begin{split}
J&=\sum_{1\le j_i\le 3,\, \Sigma_ij_i=4}
\|\Pi_i\nabla^{j_i}u\|_{L^2(B)}\\
&\le C\||\nabla u|^4+|\nabla u|^2|\nabla^2 u|+|\nabla^2 u|^2+|\nabla u||\nabla^3 u|\|_{L^2(B)}
\end{split}
\end{equation*}
and corresponding terms involving at most $3$ derivatives in total, which we will omit.

In dealing with the first term,
by the multiplicative inequality \eqref{A.2} and Sobolev's embedding
$H^2(B)\hookrightarrow L^{\infty}(B)$ we can estimate
\begin{equation*}
\begin{split}
\|\nabla u\|^4_{L^8(B)}&\le\|\nabla u\|^2_{L^4(B)}\|\nabla u\|^2_{L^{\infty}(B)}
\le C\|\nabla u\|_{H^1(B)}\|\nabla u\|_{L^2(B)}\|\nabla u\|^2_{L^{\infty}(B)}\\
&\le C(\|\nabla^2 u\|^2_{L^2(B)}+E(u))\|\nabla u\|^2_{L^{\infty}(B)}
\le C_2\|\nabla u\|^2_{L^{\infty}(B)}\\
&\le C_2(\|\nabla^3 u\|_{L^2(B)}+\|\nabla u\|_{L^2(B)})\|\nabla u\|_{L^{\infty}(B)}
\end{split}
\end{equation*}
with a constant $C_2=C_2(T,R,u_0)>0$ as in the statement of the proposition.
Similarly there holds
\begin{equation*}
\begin{split}
\|\nabla^2& u\|^2_{L^4(B)}\le C\|\nabla^2 u\|_{H^1(B)}\|\nabla^2 u\|_{L^2(B)}\\
&\le\|\nabla^3 u\|_{L^2(B)}\|\nabla^2 u\|_{L^2(B)}+\|\nabla^2 u\|^2_{L^2(B)}
\le C_2(1+\|\nabla^3 u\|_{L^2(B)}).
\end{split}
\end{equation*}
Hence we can also bound
\begin{equation*}
\begin{split}
\||\nabla u|^2&|\nabla^2 u|\|_{L^2(B)}\le\|\nabla u\|^4_{L^8(B)}+\|\nabla^2 u\|^2_{L^4(B)}\\
&\le C_2(1+\|\nabla^3 u\|_{L^2(B)})(1+\|\nabla u\|_{L^{\infty}(B)}).
\end{split}
\end{equation*}
Finally, we estimate
\begin{equation*}
\begin{split}
\||\nabla u||\nabla^3 u|\|_{L^2(B)}
\le\|\nabla^3 u\|_{L^2(B)}\|\nabla u\|_{L^{\infty}(B)}
\end{split}
\end{equation*}
to obtain
\begin{equation*}
J\le C_2(1+\|\nabla^3 u\|_{L^2(B)})(1+\|\nabla u\|_{L^{\infty}(B)}).
\end{equation*}

But with the inequality
\begin{equation*}
\|f\|_{L^{\infty}(B)}\le C\|f\|_{H^1(B)}(1+\log^{1/2}(1+\|f\|_{H^2(B)}/\|f\|_{H^1(B)})
\end{equation*}
for $f\in H^2(B)$ due to Brezis-Gallouet \cite{Brezis-Gallouet-1980}
(see also Brezis-Wainger \cite{Brezis-Wainger-1980} for a more general version)
we have
\begin{equation*}
\begin{split}
&\|\nabla u\|^2_{L^{\infty}(B)}
\le C\|\nabla u\|^2_{H^1(B)}\big(1+\log(1+\|\nabla u\|_{H^2(B)}/\|\nabla u\|_{H^1(B)})\big)\\
&\quad\le C_2(1+\log(1+\|\nabla^3 u\|_{L^2(B)})),
\end{split}
\end{equation*}
and Lemma \ref{lemma4.3} yields the differential inequality
\begin{equation*}
\begin{split}
\frac{d}{dt}&\big(\|\nabla\partial^2_{\phi}u\|^2_{L^2(B)}\big)
+\|u_{\phi\phi r}\|^2_{L^2(\partial B)}\\
&\le C_2\|\nabla\partial^2_{\phi}u\|_{L^2(B)}
(1+\|\nabla^3 u\|_{L^2(B)})\big(1+\log(1+\|\nabla^3 u\|_{L^2(B)})\big).
\end{split}
\end{equation*}
Simplifying, and recalling that
$\|\nabla^3 u\|^2_{L^2(B)}\le C\|\nabla\partial^2_{\phi}u\|^2_{L^2(B)}$ by \eqref{4.3},
we then find
\begin{equation*}
\begin{split}
\frac{d}{dt}\big(1+&\|\nabla\partial^2_{\phi}u\|_{L^2(B)}\big)\\
&\le C_2(1+\|\nabla\partial^2_{\phi}u\|_{L^2(B)})\big(1+\log(1+\|\nabla\partial^2_{\phi}u\|_{L^2(B)})\big);
\end{split}
\end{equation*}
that is, we have
\begin{equation*}
\begin{split}
\frac{d}{dt}\big(1+&\log(1+\|\nabla\partial^2_{\phi}u\|_{L^2(B)})\big)
\le C_2\big(1+\log(1+\|\nabla\partial^2_{\phi}u\|_{L^2(B)})\big).
\end{split}
\end{equation*}
Arguing as in the proof of Proposition \ref{prop4.4} we then obtain the claim.
\end{proof}

\subsection{$H^m$-bounds, $m\ge 4$}
In view of Proposition \ref{prop4.5} we can now use induction to prove the following result.

\begin{proposition}\label{prop4.6}
For any $k\ge 3$, any smooth $u_0\in H^{1/2}(S^1;N)$, and any $T<T_0$ there holds
\begin{equation*}
\sup_{0<t<T}\int_B|\nabla\partial^k_{\phi}(t)|^2dz
+\int_0^T\int_{\partial B}|\partial^k_{\phi}u_r|^2d\phi\,dt
\le C_k\int_B|\nabla\partial^k_{\phi}u_0|^2dz+C_k,
\end{equation*}
where we denote as $C_k=C_k(T,R,u_0)>0$ a constant bounded by the terms on the right hand side in the statement of the proposition for $k-1$.
\end{proposition}

\begin{proof}
By Proposition \ref{prop4.5} the claimed result holds true for $k=2$.
Suppose the claim holds true for some $k_0\ge 2$ and let $k=k_0+1$.
Note that by Sobolev's embedding $H^2(B)\hookrightarrow W^{1,4}\cap C^0(\bar{B})$
and \eqref{4.3} for $0\le t<T$ we then have the uniform bounds
\begin{equation}\label{4.8}
\begin{split}
\|\nabla^{k_0+1}u\|^2_{L^2(B)}&+\|\nabla^{k_0}u\|^2_{L^4(B)}
+\sum_{1\le j\le k_0-1}\|\nabla^{j}u\|^2_{L^{\infty}(B)}\\
&\le C_{k_0}\|\nabla^{k_0+1}u_0\|^2_{L^2(B)}+C_{k_0}\le C_k<\infty
\end{split}
\end{equation}
with a constant of the type $C_k$, as defined above.

By Lemma \ref{lemma4.3} again we only need to bound the term
\begin{equation*}
J=\sum_{1\le j_i\le k+1,\,\Sigma_ij_i\le k+2}\|\Pi_i\nabla^{j_i}u\|_{L^2(B)}.
\end{equation*}
Clearly we have
\begin{equation*}
\begin{split}
J&\le\|\nabla^{k+1}u\|_{L^2(B)}\|\nabla u\|_{L^{\infty}(B)}
+\|\nabla^ku\|_{L^2(B)}\|\nabla u\|^2_{L^{\infty}(B)}+\|\nabla^ku\nabla^2u\|_{L^2(B)}\\
&\qquad+\|\nabla^{k-1}u\nabla^3u\|_{L^2(B)}
+\|\nabla^{k-1}u\nabla^2u\|_{L^2(B)}\|\nabla u\|_{L^{\infty}(B)}+C_k\\
&\le C_k\|\nabla^{k+1}u\|_{L^2(B)}+\|\nabla^ku\nabla^2u\|_{L^2(B)}
+\|\nabla^{k-1}u\nabla^3u\|_{L^2(B)}+C_k.
\end{split}
\end{equation*}

We now distinguish the following cases: If $k-1=k_0\ge 3$ by \eqref{4.8} we can bound
\begin{equation*}
\|\nabla^ku\nabla^2u\|_{L^2(B)}
\le\|\nabla^ku\|_{L^2(B)}\|\nabla^2u\|_{L^{\infty}(B)}
\le C_{k_0}\|\nabla^{k_0+1}u\|^2_{L^2(B)}+C_{k_0}\le C_k
\end{equation*}
as well as
\begin{equation*}
\begin{split}
\|\nabla^{k-1}u\nabla^3u\|_{L^2(B)}\le\|\nabla^{k-1}u\|_{L^4(B)}\|\nabla^3u\|_{L^4(B)}
\le C_{k_0}\|\nabla^{k_0}u\|^2_{L^4(B)}+C_{k_0}\le C_k
\end{split}
\end{equation*}
to obtain the estimate
\begin{equation*}
J\le C_k\|\nabla^{k+1}u\|_{L^2(B)}+C_k.
\end{equation*}

If, on the other hand, $k_0=k-1=2$, by our induction hypothesis \eqref{4.8} we have
\begin{equation*}
\begin{split}
\|\nabla^{k-1}u&\nabla^3u\|_{L^2(B)}=\|\nabla^2u\nabla^ku\|^2_{L^2(B)}
\le\|\nabla^ku\|_{L^4(B)}\|\nabla^2u\|_{L^4(B)}\\
&\le C_k\|\nabla^ku\|_{H^1(B)}\le C_k\|\nabla^{k+1}u\|_{L^2(B)}+C_k,
\end{split}
\end{equation*}
and we find
\begin{equation*}
J\le C_k\|\nabla^{k+1}u\|_{L^2(B)}+C_k
\end{equation*}
as before.

In any case, inequality \eqref{4.3} and Lemma \ref{lemma4.3} now may be invoked to obtain
\begin{equation*}
\begin{split}
\frac{d}{dt}\big(\|\nabla\partial^k_{\phi}u\|^2_{L^2(B)}\big)
\le C_k\|\nabla\partial^k_{\phi}u\|^2_{L^2(B)}+C_k,
\end{split}
\end{equation*}
and our claim follows.
\end{proof}

\subsection{Local $H^2$-bounds}
The bounds established so far all require the initial data to be sufficiently smooth for the estimate at hand and do not yet allow to show smoothing of the flow.
For the latter purpose we next prove a second set of ``intermediate'' estimates that in combination with the first set of estimates later will allow boot-strapping.
Moreover, in contrast to the estimates established so far, the following estimates may be localized. This will be important for showing regularity of the flow at blow-up times away from concentration points of the energy on $\partial B$.

For the localized estimates,
fix a point $z_0\in\partial B$ and some radius $0<R_0<1/4$ and for $k\in\mathbbm N$ set
$R_k=2^{-k}R_0$, $\varphi_k=\varphi_{z_0,R_k}$. Set $\varphi_k=1$ for each $k\in\mathbbm N$
for the analogous global bounds.

We first establish the following localized version of Lemma \ref{lemma4.1}.

\begin{lemma}\label{lemma4.7}
With a constant $C>0$ depending only on $N$ there holds
\begin{equation*}
\frac{d}{dt}\big(\int_{\partial B}|u_{\phi}|^2\varphi_1^2\,d\phi\big)
+\int_B|\nabla u_{\phi}|^2\varphi_1^2\,dz
\le C\int_B|\nabla u|^2|u_{\phi}|^2\varphi_1^2\,dz+CR_0^{-2}E(u_0).
\end{equation*}
\end{lemma}

\begin{proof}
Similar to the proof of Lemma \ref{lemma4.1}, we compute
\begin{equation*}
\begin{split}
&\frac12\frac{d}{dt}\big(\int_{\partial B}|u_{\phi}|^2\varphi_1^2\,d\phi\big)
=\int_{\partial B}u_{\phi}\cdot u_{\phi,t}\varphi_1^2\,d\phi
=-\int_{\partial B}\partial_{\phi}(u_{\phi}\varphi_1^2)\cdot u_td\phi\\
&=\int_{\partial B}\partial_{\phi}(u_{\phi}\varphi_1^2)\cdot d\pi_N(u)u_rd\phi
=-\int_{\partial B}\big(u_{\phi}\cdot u_{r\phi}
-u_{\phi}\cdot\partial_{\phi}(\nu(u)\,\nu(u)\cdot u_r)\big)\varphi_1^2d\phi\\
&=-\frac12\int_{\partial B}\partial_r(|u_{\phi}|^2)\varphi_1^2d\phi
-\int_{\partial B}u_{\phi}\cdot d\nu(u)u_{\phi}\,\nu(u)\cdot u_r\varphi_1^2d\phi.
\end{split}
\end{equation*}
With $\Delta|u_{\phi}|^2=2|\nabla u_{\phi}|^2$ we obtain
\begin{equation*}
\frac12\int_{\partial B}\partial_r(|u_{\phi}|^2)\varphi_1^2d\phi
=\int_B|\nabla u_{\phi}|^2\varphi_1^2dz+\int_B\nabla |u_{\phi}|^2\varphi_1\nabla\varphi_1dz,
\end{equation*}
where
\begin{equation*}
\big|\int_B\nabla |u_{\phi}|^2\varphi_1\nabla\varphi_1dz\big|
\le\frac14\int_B|\nabla u_{\phi}|^2\varphi_1^2dz+C\int_B |u_{\phi}|^2|\nabla\varphi_1|^2dz
\end{equation*}
by Young's inequality. Finally, we can bound
\begin{equation*}
\begin{split}
\int_{\partial B}&u_r\cdot\nu(u)\,u_{\phi}\cdot d\nu(u)u_{\phi}\varphi_1^2\,d\phi
=\int_B\nabla u\cdot\nabla\big(\nu(u)\,u_{\phi}\cdot d\nu(u)u_{\phi}\varphi_1^2\big)dz\\
&\le C\int_B\big(|\nabla u_{\phi}||\nabla u||u_{\phi}|+|\nabla u|^2|u_{\phi}|^2\big)\varphi_1^2dz
+C\int_B|\nabla u||\nabla\varphi_1||u_{\phi}|^2\varphi_1dz\\
&\le\frac14\int_B|\nabla u_{\phi}|^2\varphi_1^2dz
+C\int_B|\nabla u|^2|u_{\phi}|^2\varphi_1^2dz+C\int_B|\nabla u|^2|\nabla\varphi_1|^2dz,
\end{split}
\end{equation*}
and our claim follows.
\end{proof}

We need a substitute for the global bound \eqref{4.3}.
For this, we note that the equation \eqref{4.1}
also implies the pointwise bound $|u_{rr}|^2\le2|u_{\phi\phi}|^2/r^4+2|u_r|^2/r^2$;
hence we have
\begin{equation*}
\begin{split}
|\nabla^2u|^2\le C(|\nabla u_{\phi}|^2+2|\nabla u|^2)\ \hbox{ in } B_{R_0}(z_0)
\end{split}
\end{equation*}
with an absolute constant $C>0$, uniformly in $z_0\in\partial B$ and $0<R_0<1/4$.
By induction then, similarly we have
\begin{equation}\label{4.9}
\begin{split}
|\nabla^{k+1}u|^2\le C(|\nabla^k\partial_{\phi}u|^2+|\nabla^ku|^2)
\le C\sum_{j=0}^k|\nabla\partial_{\phi}^ju|^2\ \hbox{ in } B_{R_0}(z_0)
\end{split}
\end{equation}
with an absolute constant $C=C(k)>0$, uniformly in $z_0\in\partial B$ and $0<R_0<1/4$
for any $k\in\mathbbm N$.

Likewise, as a substitute for the global non-concentration condition \eqref{4.4}
we now suppose that $z_0\in\partial B$ is not a concentration point in the sense that for suitably chosen
$\delta>0$ to be determined in the sequel and some $0<R_0<1/4$ as above there holds
\begin{equation}\label{4.10}
\sup_{0<t<T_0}\int_{B_{R_0}(z_0)\cap B}|\nabla u(t)|^2dz<\delta.
\end{equation}
We then obtain the following localized version of Proposition \ref{prop4.2}.

\begin{proposition}\label{prop4.8}
There exist constants $\delta>0$ and $C>0$ independent of $R_0>0$
such that whenever \eqref{4.10} holds then for any $T\le T_0$ we have
\begin{equation*}
\begin{split}
\sup_{0<t<T}\int_{\partial B}|u_{\phi}(t)|^2\varphi_1^2\,d\phi
&+\int_0^T\int_B|\nabla u_{\phi}|^2\varphi_1^2\,dz\,dt\\
&\le2\int_{\partial B}|u_{0,\phi}|^2\varphi_1^2\,d\phi+CTR_0^{-2}E(u_0).
\end{split}
\end{equation*}
\end{proposition}

\begin{proof}
With the help of the inequality \eqref{A.1} in the Appendix we can bound
\begin{equation*}
\begin{split}
\int_B|\nabla u|^4&\varphi_1^2dz
\le C\delta\int_{B_R(z_i)}|\nabla^2u|^2\varphi_1^2dz
+C\delta R_0^{-2}\int_{B_R(z_i)}|\nabla u|^2|dz.
\end{split}
\end{equation*}
Thus, for sufficiently small $\delta>0$ our claim follows from Lemma \ref{lemma4.7}.
\end{proof}

The next lemma again prepares for a proposition that later will allow us to obtain higher derivative bounds by induction. Note the differences to Lemma \ref{lemma4.3}.

\begin{lemma}\label{lemma4.9}
For any $k\ge 2$, with a constant $C>0$ depending only on $k$ and $N$,
for the solution $u=u(t)$ to \eqref{1.3}, \eqref{1.4} for any $0<t<T_0$ there holds
\begin{equation*}
\begin{split}
\frac{d}{dt}&\big(\|\partial^k_{\phi}u\varphi_k\|^2_{L^2(\partial B)}\big)
+\|\nabla\partial^k_{\phi}u\varphi_k\|^2_{L^2(B)}\\
&\le C\sum_{1\le j_i\le k,\, \Sigma_ij_i\le 2k+2} \|\Pi_i\nabla^{j_i}u\varphi_k^2\|_{L^1(B)}\\
&\quad+C\sum_{1\le j_i\le k,\,\Sigma_{i\ge 0}j_i\le k+1}
\|\Pi_{i>0}\nabla^{j_i}u\nabla^{j_0}\varphi_k\|^2_{L^2(B)}+CR_0^{-2k}E(u_0).
\end{split}
\end{equation*}
\end{lemma}

\begin{proof}
Fix $k\ge 2$. With $\Delta|\partial^k_{\phi}u|^2=2|\nabla\partial^k_{\phi}u|^2$ we compute
\begin{equation*}
\begin{split}
\frac12&\frac{d}{dt}\big(\|\partial^k_{\phi}u\varphi_k\|^2_{L^2(\partial B)}\big)
=(-1)^k\int_{\partial B}\partial^k_{\phi}(\partial^k_{\phi}u\varphi_k^2)\cdot u_td\phi\\
&=(-1)^{k+1}\int_{\partial B}\partial^k_{\phi}(\partial^k_{\phi}u\varphi_k^2)
\cdot(u_r-\nu(u)\,\nu(u)\cdot u_r)d\phi\\
&=-\frac12\int_{\partial B}\partial_r(|\partial^k_{\phi}u|^2)\varphi_k^2d\phi
+\int_{\partial B}\partial^k_{\phi}u\cdot\partial^k_{\phi}\big(\nu(u)\,\nu(u)\cdot u_r)\big)
\varphi_k^2d\phi.\\
&=-\int_B|\nabla\partial^k_{\phi}u|^2\varphi_k^2dz
-\int_B\nabla(|\partial^k_{\phi}u|^2)\varphi_k\nabla\varphi_kdz+I,
\end{split}
\end{equation*}
where we split
\begin{equation*}
I=\int_{\partial B}\partial^k_{\phi}u\cdot\partial^k_{\phi}
\big(\nu(u)\,\nu(u)\cdot u_r)\big)\varphi_k^2d\phi=\sum_{j=0}^k\Big({k\atop j}\Big)I_j
\end{equation*}
with
\begin{equation*}
\begin{split}
I_j=(\partial^k_{\phi}u\cdot&\partial^j_{\phi}(\nu(u)\,\nu(u))\varphi_k^2,
\partial^{k-j}_{\phi}u_r)_{L^2(\partial B)}\\
&=\big(\nabla\big(\partial^k_{\phi}u\cdot\partial^j_{\phi}(\nu(u)\nu(u))\varphi_k^2\big),
\nabla\partial^{k-j}_{\phi}u\big)_{L^2(B)},\ 0\le j\le k.
\end{split}
\end{equation*}
For $1\le j\le k$ we bound
\begin{equation*}
\begin{split}
|I_j|&\le C\sum_{0\le i\le j}\|\nabla\partial^k_{\phi}u\varphi_k\|_{L^2(B)}
\|\partial^{j-i}_{\phi}\nu(u)\partial^{i}_{\phi}\nu(u)
\nabla\partial^{k-j}_{\phi}u\varphi_k\|_{L^2(B)}\\
&\quad+C\sum_{0\le i\le j}\|\partial^k_{\phi}u\cdot
\nabla\big(\partial^{j-i}_{\phi}\nu(u)\partial^{i}_{\phi}\nu(u)\varphi_k^2\big)\cdot
\nabla\partial^{k-j}_{\phi}u\|_{L^1(B)}\\
\end{split}
\end{equation*}
By the chain rule then for $1\le j\le k$ we have
\begin{equation*}
\begin{split}
|I_j|&\le C\sum_{1\le j_i\le k,\, \Sigma_ij_i=k+1}\|\nabla\partial^k_{\phi}u\varphi_k\|_{L^2(B)}
\|\Pi_i\nabla^{j_i}u\varphi_k\|_{L^2(B)}\\
&\quad+C\sum_{1\le j_i\le k,\, \Sigma_ij_i=k+2}
\|\partial^k_{\phi}u\cdot\Pi_i\nabla^{j_i}u\varphi_k^2\|_{L^1(B)}\\
&\quad+C\sum_{1\le j_i\le k,\, \Sigma_ij_i=k+1}
\|\partial^k_{\phi}u\cdot\Pi_i\nabla^{j_i}u\varphi_k\nabla\varphi_k\|_{L^1(B)}.
\end{split}
\end{equation*}
By Cauchy-Schwarz and Young's inequality then we can bound
\begin{equation*}
\begin{split}
&\sum_{1\le j\le k}|I_j|\le\frac14\|\nabla\partial^k_{\phi}u\varphi_k\|^2_{L^2(B)}
+ C\sum_{1\le j_i\le k,\, \Sigma_ij_i=k+1} \|\Pi_i\nabla^{j_i}u\varphi_k\|^2_{L^2(B)}\\
&\qquad+C\sum_{1\le j_i\le k,\,\Sigma_ij_i=2k+2}
\|\Pi_i\nabla^{j_i}u\varphi_k^2\|_{L^1(B)}
+C\|\partial^k_{\phi}u\nabla\varphi_k\|^2_{L^2(B)}\\
&\le\frac14\|\nabla\partial^k_{\phi}u\varphi_k\|^2_{L^2(B)}
+C\sum_{{1\le j_i\le k}\atop{\Sigma_ij_i=2k+2}}\|\Pi_i\nabla^{j_i}u\varphi_k^2\|_{L^1(B)}
+C\|\partial^k_{\phi}u\nabla\varphi_k\|^2_{L^2(B)},
\end{split}
\end{equation*}
as claimed. Finally, with
\begin{equation*}
\nu(u)\cdot u_{\phi r}=(dist_N(u))_{\phi r}-u_r\cdot d\nu(u)u_{\phi}
\end{equation*}
as in the proof of Lemma \ref{lemma4.3}, for $j=0$ we can write
\begin{equation*}
\nu(u)\cdot\partial^k_{\phi}u_r
=\partial^{k-1}_{\phi}\big(\nu(u)\cdot u_{\phi r}\big)+II
=\partial^k_{\phi}(dist_N(u))_r+III,
\end{equation*}
where the terms in $II$ and $III$ involve products of at least two derivatives of orders between $1$ and $k$ of $u$. Thus we have
\begin{equation*}
\begin{split}
I_0&=(\partial^k_{\phi}u\cdot\nu(u)\varphi_k^2,
\nu(u)\cdot\partial^k_{\phi}u_r)_{L^2(\partial B)}\\
&=(\partial^k_{\phi}u\cdot\nu(u)\varphi_k^2,\partial^k_{\phi}(dist_N(u))_r)_{L^2(\partial B)}
+II_0
\end{split}
\end{equation*}
with a term $II_0$ that can be dealt with in the same way as the terms $I_j$, $1\le j\le k$.

Using the divergence theorem and integrating by parts we can write the leading term as
\begin{equation*}
\begin{split}
\hat{I}_0&
:=(\partial^k_{\phi}u\cdot\nu(u)\varphi_k^2,\partial^k_{\phi}(dist_N(u))_r)_{L^2(\partial B)}\\
&=\big(\nabla\big(\partial^k_{\phi}u\cdot\nu(u)\varphi_k^2\big),
\nabla\partial^k_{\phi}(dist_N(u))\big)_{L^2(B)}\\
&\qquad+\big(\partial^k_{\phi}u\cdot\nu(u)\varphi_k^2,
\Delta\partial^k_{\phi}(dist_N(u))\big)_{L^2(B)}\\
&=\big(\nabla\big(\partial^k_{\phi}u\cdot\nu(u)\varphi_k^2\big),
\nabla\partial^k_{\phi}(dist_N(u))\big)_{L^2(B)}\\
&\qquad-\big(\partial_{\phi}\big(\partial^k_{\phi}u\cdot\nu(u)\varphi_k^2\big),
\Delta\partial^{k-1}_{\phi}(dist_N(u))\big)_{L^2(B)}
\end{split}
\end{equation*}
to see that this term may be bounded
\begin{equation*}
\begin{split}
|\hat{I}_0|&\le C\|(|\nabla\partial^k_{\phi}u|+|\partial^k_{\phi}u\nabla u|)\varphi_k
+|\partial^k_{\phi}u\nabla\varphi_k|\|_{L^2(B)}
\|\nabla^{k+1}(dist_N(u))\varphi_k\|_{L^2(B)}.
\end{split}
\end{equation*}
But by elliptic regularity we again have
\begin{equation*}
\begin{split}
\|&\nabla^{k+1}(dist_N(u))\varphi_k\|_{L^2(B)}\\
&\le\|\nabla^{k+1}(dist_N(u)\varphi_k)\|_{L^2(B)}
+C\sum_{1\le j\le k+1}\|\nabla^{k+1-j}(dist_N(u))\nabla^j\varphi_k\|_{L^2(B)}\\
&\le C\|\Delta(dist_N(u)\varphi_k)\|_{H^{k-1}(B)}
+C\sum_{1\le j\le k+1}\|\nabla^{k+1-j}(dist_N(u))\nabla^j\varphi_k\|_{L^2(B)}\\
&\le C\|\Delta(dist_N(u))\varphi_k\|_{H^{k-1}(B)}
+C\sum_{1\le j\le k+1}\|\nabla^{k+1-j}(dist_N(u))\nabla^j\varphi_k\|_{L^2(B)},
\end{split}
\end{equation*}
where from \eqref{3.5} we can bound the first term on the right
\begin{equation*}
\begin{split}
\|\Delta&(dist_N(u))\varphi_k\|_{H^{k-1}(B)}
\le\sum_{0\le j<k}\|\nabla^j\big(\nabla u\cdot d\nu(u)\nabla u\varphi_k\big)\|_{L^2(B)}\\
&\le C\sum_{0\le j_0<k,\,1\le j_i\le k,\,\Sigma_{i\ge 0}j_i\le k+1}
\|\Pi_i\nabla^{j_i}u\nabla^{j_0}\varphi_k\|_{L^2(B)}.
\end{split}
\end{equation*}
Moreover, using that $dist_N(u))=0$ on $\partial B$, with the help of Poincar\'e's inequality we find the bound
\begin{equation*}
\begin{split}
\|dist_N(u)\nabla^{k+1}\varphi_k\|^2_{L^2(B)}
\le CR_k^{-2k}\|\nabla(dist_N(u))\|^2_{L^2(B_{R_k}(z_0))}\le CR_0^{-2k}E(u).
\end{split}
\end{equation*}
The remaining terms for $1\le j\le k$ can be estimated
\begin{equation*}
\begin{split}
\|\nabla^{k+1-j}(dist_N(u))\nabla^j\varphi_2\|_{L^2(B)}
\le C\sum_{\Sigma_ij_i=k+1-j}\|\Pi_i\nabla^{j_i}u\nabla^j\varphi_2\|_{L^2(B)}
\end{split}
\end{equation*}
via the chain rule. Thus, finally, we obtain the bound
\begin{equation*}
\begin{split}
\|\nabla^{k+1}&(dist_N(u))\varphi_k\|_{L^2(B)}\\
&\le C\sum_{1\le j_0,j_i\le k,\,\Sigma_{i\ge 0}j_i\le k+1}
\|\Pi_{i>0}\nabla^{j_i}u\nabla^{j_0}\varphi_2\|_{L^2(B)}+CR_0^{-2k}E(u_0).
\end{split}
\end{equation*}
By Cauchy-Schwarz and Young's inequality thus we can bound
\begin{equation*}
\begin{split}
|\hat{I}_0|&\le\frac14\|\nabla\partial^k_{\phi}u\varphi_k\|^2_{L^2(B)}
+C\|\partial^k_{\phi}u\nabla u\varphi_k\|^2_{L^2(B)}\\
&\qquad+C\sum_{1\le j_i\le k,\,\Sigma_ij_i\le k+1}
\|\Pi_{i>0}\nabla^{j_i}u\nabla^{j_0}\varphi_k\|^2_{L^2(B)}+CR_0^{-2k}E(u_0),
\end{split}
\end{equation*}
and together with our above estimate for the terms $I_j$, $j\ge 1$, our claim follows.
\end{proof}

\begin{proposition}\label{prop4.10}
There exists a constant $\delta>0$ independent of $R_0>0$
such that whenever \eqref{4.10} holds then for any $T\le T_0$ with a constant
$C_2=C_2(T,R,u_0)>0$ bounded by the terms on the right hand side in the statement of Proposition \ref{prop4.8} there holds the estimate
\begin{equation*}
\begin{split}
\sup_{0<t<T}\int_{\partial B}|u_{\phi\phi}(t)|^2\varphi_2^2\,d\phi
+\int_0^T\int_B|\nabla&u_{\phi\phi}|^2\varphi_2^2\,dz\,dt\\
&\le C_2\int_{\partial B}|u_{0,\phi\phi}|^2\varphi_2^2\,d\phi+C_2.
\end{split}
\end{equation*}
\end{proposition}

\begin{proof}
For $k=2$ with the help of Young's inequality we can bound
\begin{equation*}
\begin{split}
J_1&=\sum_{1\le j_i\le k,\, \Sigma_ij_i\le 2k+2}\|\Pi_i\nabla^{j_i}u\varphi_k^2\|_{L^1(B)}\\
&\quad\le C\|(|\nabla^2u|^3+|\nabla^2u|^2|\nabla u|^2+|\nabla^2u||\nabla u|^4
+|\nabla u|^6+1)\varphi_2^2\|_{L^1(B)}\\
&\quad\le C\|(|\nabla^2u|^3+|\nabla u|^6+1)\varphi_2^2\|_{L^1(B)},
\end{split}
\end{equation*}
and
\begin{equation*}
\begin{split}
J_2&=\sum_{1\le j_0,j_i\le k,\,\Sigma_{i\ge 0}j_i\le k+1}
\|\Pi_{i>0}\nabla^{j_i}u\nabla^{j_0}\varphi_2\|^2_{L^2(B)}\\
&\le C\|(|\nabla^2u|^2+|\nabla u|^4+1)|\nabla\varphi_2|^2
+(|\nabla u|^2+1)|\nabla^2\varphi_2|^2\|_{L^1(B)}.
\end{split}
\end{equation*}

Observing that $\varphi_1=1$ on the support of $\varphi_2$, by \eqref{A.2} for the first term in $J_1$ we have
\begin{equation*}
\begin{split}
\||\nabla^2&u|^3\varphi_2^2\|_{L^1(B)}\le\|\nabla^2 u\varphi_2\|^2_{L^4(B)}
\|\nabla^2 u\varphi_1\|_{L^2(B)}\\
&\le C\|\nabla^2 u\varphi_2\|_{H^1(B)}\|\nabla^2 u\varphi_2\|_{L^2(B)}
\|\nabla^2 u\varphi_1\|_{L^2(B)}\\
&\le C(\|\nabla^3 u\varphi_2\|_{L^2(B)}
+\|\nabla^2 u\varphi_1\|_{L^2(B)})
\|\nabla^2 u\varphi_2\|_{L^2(B)}\|\nabla^2 u\varphi_1\|_{L^2(B)}.
\end{split}
\end{equation*}
Moreover, arguing as in \eqref{A.1} for the function $|\nabla u|^6\varphi_2^2$ in place of
$|v|^4\varphi^2$, we can bound
\begin{equation*}
\begin{split}
\int_B|\nabla&u|^6\varphi_2^2dz
\le C\Big(\int_B\big(|\nabla^2 u||\nabla u|^2\varphi_2
+|\nabla u|^3|\nabla\varphi_2|\big)dz\Big)^2\\
&\le C\Big(\int_B|\nabla^2 u|^3\varphi_2^2dz\Big)^{2/3}
\Big(\int_B|\nabla u|^3\varphi_2^{1/2}dz\Big)^{4/3}
+C\Big(\int_B|\nabla u|^3|\nabla\varphi_2|\big)dz\Big)^2,
\end{split}
\end{equation*}
where by H\"older's inequality we have
\begin{equation*}
\begin{split}
\int_B&|\nabla u|^3\varphi_2^{1/2}dz
\le\Big(\int_B|\nabla u|^6\varphi_2^2dz\Big)^{1/4}
\Big(\int_B|\nabla u|^2\varphi_1^2dz\Big)^{3/4}
\end{split}
\end{equation*}
so that with Young's inequality we obtain
\begin{equation*}
\begin{split}
\int_B|\nabla&u|^6\varphi_2^2dz
\le C\delta\Big(\int_B|\nabla^2 u|^3\varphi_2^2dz\Big)^{2/3}
\Big(\int_B|\nabla u|^6\varphi_2^2dz\Big)^{1/3}\\
&\qquad+C\Big(\int_B|\nabla u|^3|\nabla\varphi_2|\big)dz\Big)^2\\
&\le\frac12\int_B|\nabla u|^6\varphi_2^2dz
+C\int_B|\nabla^2 u|^3\varphi_2^2dz
+C\Big(\int_B|\nabla u|^3|\nabla\varphi_2|\big)dz\Big)^2.
\end{split}
\end{equation*}
With Young's inequality for suitable $\varepsilon>0$, and using \eqref{4.9},
we then can bound
\begin{equation*}
\begin{split}
J_1&\le C\|(|\nabla^2u|^3+1)\varphi_2^2\|_{L^1(B)}
+C\||\nabla u|^3|\nabla\varphi_2|\|^2_{L^1(B)}
\le\varepsilon\|\nabla^3 u\varphi_2\|^2_{L^2(B)}\\
&\qquad\qquad+C(1+\|\nabla^2 u\varphi_2\|^2_{L^2(B)})
\|\nabla^2 u\varphi_1\|^2_{L^2(B)}+C\||\nabla u|^3|\nabla\varphi_2|\|^2_{L^1(B)}\\
&\le\frac12\|\nabla\partial^2_{\phi}u\varphi_2\|^2_{L^2(B)}
+C(1+\|\nabla\partial_{\phi}u\varphi_2\|^2_{L^2(B)})
\|\nabla\partial_{\phi}u\varphi_1\|^2_{L^2(B)}+C_1,
\end{split}
\end{equation*}
where we also have estimated
\begin{equation*}
\begin{split}
\||\nabla u|^3&|\nabla\varphi_2|\|^2_{L^1(B)}
\le C\|\nabla u\varphi_1\|^4_{L^4(B)}\|\nabla u\varphi_1\|^2_{L^2(B)}\\
&\le C\big(\|\nabla^2u\varphi_1\|^2_{L^2(B)}+E(u)\big)\|\nabla u\varphi_1\|^4_{L^2(B)}
\le C\|\nabla\partial_{\phi}u\varphi_1\|^2_{L^2(B)}+C.
\end{split}
\end{equation*}

Similarly, with \eqref{A.2} we have
\begin{equation*}
J_2\le C\|\nabla^2 u\varphi_1\|^2_{L^2(B)}+C.
\end{equation*}
Thus, from Lemma \ref{lemma4.7} we obtain
\begin{equation}\label{4.11}
\begin{split}
\frac{d}{dt}\big(\|\partial^2_{\phi}&u\varphi_2\|^2_{L^2(\partial B)}\big)
+\frac12\|\nabla\partial^2_{\phi}u\varphi_2\|^2_{L^2(B)}\\
&\le C(1+\|\nabla\partial_{\phi}u\varphi_2\|^2_{L^2(B)})
\|\nabla\partial_{\phi}u\varphi_1\|^2_{L^2(B)}+C.
\end{split}
\end{equation}

Denote as $C_1=C_1(T,R,u_0)>0$ a constant bounded by the terms on the right hand side in the statements of Propositions \ref{prop4.8}.
By elliptic regularity, using that $|\Delta(u\varphi_2|\le 2|\nabla u\nabla\varphi_2|+C$
we can bound
\begin{equation*}
\begin{split}
\|\nabla^2&u\varphi_2\|^2_{L^2(B)}
\le\|u\varphi_2\|^2_{H^2(B)}+C\|\nabla u\nabla\varphi_2\|^2_{L^2(B)}+C\\
&\le C\|u\varphi_2\|^2_{H^2(\partial B)}+\|\Delta(u\varphi_2)\|^2_{L^2(B)}
+C\|\nabla u\nabla\varphi_2\|^2_{L^2(B)}+C\\
&\le C\|\partial^2_{\phi}u\varphi_2\|^2_{L^2(\partial B)}+CE(u)+C_1.
\end{split}
\end{equation*}
From \eqref{4.11} we then obtain the differential inequality
\begin{equation*}
\begin{split}
\frac{d}{dt}\big(1+\|\partial^2_{\phi}&u\varphi_2\|^2_{L^2(\partial B)}\big)
\le C(1+\|\partial^2_{\phi}u\varphi_2\|^2_{L^2(\partial B)})
\|\nabla\partial_{\phi}u\varphi_1\|^2_{L^2(B)}+C_1;
\end{split}
\end{equation*}
that is,
\begin{equation*}
\begin{split}
\frac{d}{dt}\Big(\log\big(1+\|\partial^2_{\phi}&u\varphi_2\|^2_{L^2(\partial B)}\big)\Big)
\le C\|\nabla\partial_{\phi}u\varphi_1\|^2_{L^2(B)}+C_1,
\end{split}
\end{equation*}
and the right hand side is integrable in time by Proposition \ref{prop4.8}.
The claim follows.
\end{proof}

We continue by induction.

\begin{proposition}\label{prop4.11}
There exists a constant $\delta>0$ independent of $R_0>0$ with the following property.
Whenever \eqref{4.10} holds, then for any $k\ge 3$, any smooth $u_0\in H^{1/2}(S^1;N)$,
and any $T<T_0$, there holds
\begin{equation*}
\sup_{0<t<T}\int_{\partial B}|\partial^k_{\phi}u(t)|^2\varphi_k^2d\phi
+\int_0^T\int_B|\nabla\partial^k_{\phi}u|^2\varphi_k^2dz\,dt
\le C_k\int_{\partial B}|\partial^k_{\phi}u_0|^2\varphi_k^2d\phi+C_k,
\end{equation*}
where we denote as $C_k=C_k(T,R,u_0)>0$ a constant bounded by the terms on the right hand side in the statement of the proposition for $k-1$.
\end{proposition}

\begin{proof}
By Proposition \ref{prop4.10} the claimed result holds true for $k=2$.
Suppose the claim holds true for some $k_0\ge 2$ and let $k=k_0+1$.
Note that by elliptic regularity, as in the proof of Proposition \ref{prop4.10}
we can bound
\begin{equation*}
\begin{split}
\|\nabla^k&u\varphi_k\|^2_{L^2(B)}
\le\|u\varphi_k\|^2_{H^k(B)}+C\sum_{j<k}\|\nabla^ju\nabla^{k-j}\varphi_k\|^2_{L^2(B)}\\
&\le C\|u\varphi_2\|^2_{H^k(\partial B)}+C\|\Delta(u\varphi_k)\|^2_{H^{k-2}(B)}
+C\sum_{j<k}\|\nabla^ju\nabla^{k-j}\varphi_k\|^2_{L^2(B)}\\
&\le C\|\partial^k_{\phi}u\varphi_k\|^2_{L^2(\partial B)}
+C\sum_{j<k}\|\nabla^ju\nabla^{k-j}\varphi_k\|^2_{L^2(B)}+C_k.
\end{split}
\end{equation*}
By induction hypothesis and Sobolev's embedding
$H^2(B)\hookrightarrow W^{1,4}\cap C^0(\bar{B})$ for $0\le t<T$ we then have the uniform bounds
\begin{equation*}
\begin{split}
\|\nabla^{k_0}u\varphi_{k_0}\|^2_{L^2(B)}&+\|\nabla^{k_0-1}u\varphi_{k_0}\|^2_{L^4(B)}
+\sum_{j=1}^{k_0-2}\|\nabla^{j}u\varphi_{k_0}\|^2_{L^{\infty}(B)}\le C_k,
\end{split}
\end{equation*}
and it follows that
\begin{equation*}
\begin{split}
\|\nabla^k&u\varphi_k\|^2_{L^2(B)}+\|\nabla^{k_0}u\varphi_k\|^2_{L^4(B)}
+\|\nabla^{k_0-1}u\varphi_k\|^2_{L^{\infty}(B)}
\le C\|\partial^k_{\phi}u\varphi_k\|^2_{L^2(\partial B)}+C_k.
\end{split}
\end{equation*}

Again let
\begin{equation*}
\begin{split}
J_1&:=\sum_{1\le j_i\le k,\, \Sigma_ij_i=2k+2}
\|\Pi_i\nabla^{j_i}u\varphi_k^2\|_{L^1(B)}\\
&\le\|\big(|\nabla^ku|^2(|\nabla^2u|+|\nabla u|^2)
+|\nabla^ku||\nabla^{k_0}u||\nabla^3u|+\dots+|\nabla u|^{2k+2}\big)
\varphi_k^2\|_{L^1(B)}.
\end{split}
\end{equation*}
and set
\begin{equation*}
\begin{split}
J_2&=\sum_{1\le j_0,j_i\le k,\,\Sigma_{i\ge 0}j_i\le k+1}
\|\Pi_{i>0}\nabla^{j_i}u\nabla^{j_0}\varphi_2\|^2_{L^2(B)}.
\end{split}
\end{equation*}

Suppose $k_0=2$. Recalling that $\varphi_k=\varphi_k\varphi_{k_0}$,
we can bound the listed terms
\begin{equation*}
\begin{split}
\||\nabla^3&u|^2(|\nabla^2u|+|\nabla u|^2)\varphi_3^2\|_{L^1(B)}\\
&\le\|\nabla^3u\varphi_3\|^2_{L^4(B)}(\|\nabla^2u\varphi_2\|_{L^2(B)}
+\|\nabla u\varphi_2\|^2_{L^4(B)})\\
&\le C_3\|\nabla\partial^3_{\phi}u\varphi_3\|_{L^2(B)}\|\nabla^3u\varphi_3\|_{L^2(B)}
+ C_3\|\nabla^3u\varphi_2\|^2_{L^2(B)}+C_3\\
&\le C_3\|\nabla\partial^3_{\phi}u\varphi_3\|_{L^2(B)}
\|\partial^3_{\phi}u\varphi_3\|_{L^2(\partial B)}
+ C_3\|\nabla\partial^2_{\phi}u\varphi_2\|^2_{L^2(B)}+C_3\\
&\le\varepsilon\|\nabla\partial^3_{\phi}u\varphi_3\|^2_{L^2(B)}
+C_3\|\partial^3_{\phi}u\varphi_3\|^2_{L^2(\partial B)}
+ C_3\|\nabla\partial^2_{\phi}u\varphi_2\|^2_{L^2(B)}+C_3,
\end{split}
\end{equation*}
and
\begin{equation*}
\begin{split}
\||\nabla u|^8\varphi_3^2\|_{L^1(B)}
&\le\|\nabla u\varphi_3\|^2_{L^{\infty}(B)}\|\nabla u\varphi_2\|^6_{L^6(B)}\\
&\le C_3\|\partial^3_{\phi}u\varphi_3\|^2_{L^2(\partial B)}+C_3,
\end{split}
\end{equation*}
respectively.
Here we also have used \eqref{A.1}, \eqref{A.2} to bound
\begin{equation*}
\begin{split}
\|\nabla u&\varphi_2\|^3_{L^6(B)}\le\|\nabla(|\nabla u|^3\varphi^3_2)\|_{L^1(B)}\\
&\le C\|(|\nabla^2u|\varphi_2
+|\nabla u||\nabla\varphi_2|)|\nabla u|^2\varphi^2_2\|_{L^1(B)}\\
&\le C(\|\nabla^2u\varphi_2\|_{L^2(B)}+\|\nabla u\nabla\varphi_2\|_{L^2(B)})
\|\nabla u\varphi_2\|^2_{L^4(B)}\\
&\le C(\|\nabla^2u\varphi_2\|_{L^2(B)}+\|\nabla u\nabla\varphi_2\|_{L^2(B)})^2
\|\nabla u\varphi_2\|_{L^2(B)}\le C_3.
\end{split}
\end{equation*}
Similarly, we can bound the remaining terms and the terms in $J_2$ to obtain
\begin{equation*}
\begin{split}
\frac{d}{dt}\big(\|\partial^3_{\phi}&u\varphi_3\|^2_{L^2(\partial B)}\big)
+\frac12\|\nabla\partial^3_{\phi}u\varphi_3\|^2_{L^2(B)}\\
&\le C_3(1+\|\partial^3_{\phi}u\varphi_3\|^2_{L^2(\partial B)})
(1+\|\nabla\partial^2_{\phi}u\varphi_2\|^2_{L^2(B)})+C_3
\end{split}
\end{equation*}
from Lemma \ref{lemma4.9} and then
\begin{equation*}
\begin{split}
\frac{d}{dt}\Big(\log\big(1+\|\partial^3_{\phi}&u\varphi_2\|^2_{L^2(\partial B)}\big)\Big)
\le C_3(1+\|\nabla\partial^2_{\phi}u\varphi_2\|^2_{L^2(B)},
\end{split}
\end{equation*}
where the right hand side is integrable in time by Proposition \ref{prop4.10}.
The claim for $k=3$ thus follows.

For $k\ge 4$ the analysis is similar (but simpler) and may be left to the reader.
\end{proof}

\section{Local existence}\label{Local existence}
In order to show local existence we approximate the flow equation \eqref{1.3}
by the equation
\begin{equation}\label{5.1}
u_t=-(\varepsilon+d\pi_N(u)) u_r\hbox{ on } \partial B.
\end{equation}
where $\varepsilon>0$ and where we smoothly extend the nearest-neighbor projection $\pi_N$,
originally defined only in the $\rho$-neighborhood $N_{\rho}$ of $N$, to the whole ambient $\mathbbm R^n$.
Our aim then is to show that for given smooth initial data $u_0$
the evolution problem \eqref{5.1}, \eqref{1.4} admits a smooth solution $u_{\varepsilon}$
which remains uniformly smoothly bounded on a uniform time interval as
$\varepsilon\downarrow 0$.
Fixing some $0<\varepsilon<1/2$, we show existence for the problem
\eqref{5.1} with data \eqref{1.4} by means of a fixed-point argument.

To set up the argument, fix smooth initial data $u_0\colon S^1\to N$
with harmonic extension $u_0\in C^{\infty}(\bar{B};\mathbbm R^n)$ and some $k\ge 2$.
For suitable $T>0$ to be determined let
\begin{equation*}
X=L^{\infty}\big([0,T];H^{k+1}(B;\mathbbm R^n)\big)\cap H^1(S^1\times [0,T];\mathbbm R^n)
\end{equation*}
and set
\begin{equation*}
\begin{split}
V=\{v & \in X;\;v(0)=u_0,\
\Delta v(t)=0\hbox{ in } B \hbox{ for } 0\le t\le T,\\
&\|v\|^2_X=\sup_{0\le t\le T}\|v(t)\|^2_{H^{k+1}(B)}
+\int_0^T\int_{S^1}|v_t|^2d\phi\,dt\le 4R_0^2\},
\end{split}
\end{equation*}
where $R_0=\|u_0\|_{H^{k+1}(B)}$.
We endow the space $V$ with the metric derived from the semi-norm
\begin{equation*}
|v|^2_X=\sup_{0\le t\le T}\|\nabla v(t)\|^2_{L^2(B)}+
\int_0^T\int_{S^1}|v_t|^2d\phi\,dt.
\end{equation*}
Note that this metric is positive definite on $V$ in view of the initial condition that we impose.

\begin{lemma}\label{lemma5.1}
$V$ is a complete metric space.
\end{lemma}

\begin{proof}
Let $(v_m)_{m\in\mathbbm N}\subset V$ with $|v_l-v_m|_X\to 0$ ($l,m\to\infty$).
By the theorem of Banach-Alaoglu a subsequence $v_m\rightharpoondown v$
weakly-$*$ in $L^{\infty}\big([0,T];H^{k+1}(B)\big)$
with $v_{m,t}\to v_t$ weakly in $L^2([0,T]\times S^1)$,
and by weak lower semi-continuity of the norm there holds
\begin{equation*}
\|v\|^2_X\le\limsup_{m\to\infty}\|v_m\|^2_X\le 4R_0^2.
\end{equation*}
Moreover, we have $\Delta v(t)=0$ for all $0\le t\le T$ and $v(0)=u_0$ by compactness of the trace operator $H^1(S^1\times [0,T])\ni u\mapsto u(0)\in L^2(S^1)$.
Hence $v\in V$.

Moreover, we have
\begin{equation*}
|v_l-v|_X\le\limsup_{m\to\infty}|v_l-v_m|_X\to 0\ \hbox{ as }l\to\infty.
\end{equation*}
\end{proof}

\begin{lemma}\label{lemma5.2}
There is $T_2>0$ such that for any $T\le T_2$, any $v\in V$ there is a solution
$u=\Phi(v)\in V$ of the equation
\begin{equation}\label{5.2}
u_t=-(\varepsilon+d\pi_N(v)) u_r\hbox{ on } \partial B\times[0,T_2[,
\end{equation}
satisfying \eqref{1.4}.
\end{lemma}

\begin{proof}
For $v\in V$ we construct a solution $u=\Phi(v)\in X$ of \eqref{5.2} via Galerkin approximation. For this let $(\varphi_l)_{l\in\mathbbm N_0}$ be Steklov eigenfunctions of the Laplacian, satisfying
\begin{equation*}
\Delta\varphi_l=0 \hbox{ in } B
\end{equation*}
with boundary condition
\begin{equation*}
\partial_r\varphi_l=\lambda_l\varphi_l \hbox{ on } \partial B,\ l\in\mathbbm N_0.
\end{equation*}
Note that the Steklov eigenvalues are given by
$\lambda_0=0$ and $\lambda_{2l-1}=\lambda_{2l}=l$, $l\in\mathbbm N$. In fact,
we may choose $\varphi_0\equiv 1/\sqrt{2\pi}$ and
\begin{equation}\label{5.3}
\varphi_{2l-1}(re^{i\theta})=\frac{1}{\sqrt{\pi}}r^lsin(l\theta),\
\varphi_{2l}(re^{i\theta})=\frac{1}{\sqrt{\pi}}r^lcos(l\theta),\ l\in\mathbbm N.
\end{equation}
to obtain an orthonormal basis for $L^2(S^1)$ consisting of these functions.
Given $m\in\mathbbm N$ then let $u^{(m)}(t,z)=\sum_{l=0}^ma^{(m)}_l(t)\varphi_l(z)$ solve the system of equations
\begin{equation}\label{5.4}
\begin{split}
\partial_ta^{(m)}_l=(\varphi_l,&u^{(m)}_t)_{L^2(S^1)}
=-\big(\varphi_l,(\varepsilon+d\pi_N(v))u^{(m)}_r\big)_{L^2(S^1)}\\
&=-\sum_{j=0}^ma^{(m)}_j\lambda_j
\big(\varphi_l,(\varepsilon+d\pi_N(v))\varphi_j\big)_{L^2(S^1)},\ 0\le l\le m.
\end{split}
\end{equation}
Since for any $m\in\mathbbm N$ the coefficients
$\lambda_j(\varphi_l,(\varepsilon+d\pi_N(v))\varphi_j\big)_{L^2(S^1)}$
of this system are uniformly bounded for any $v\in V$, for any $m\in\mathbbm N$
there exists a unique global solution
$a^{(m)}=(a^{(m)}_l)_{0\le l\le m}$ of \eqref{5.4} with initial data
$a^{(m)}_l(0)=a_{l0}=(u_0,\varphi_l)_{L^2(S^1)}$, $0\le l\le m$.

Note that for any $m\in\mathbbm N$ and any $j\in\mathbbm N_0$ the function
\begin{equation*}
\partial^{2j}_{\phi}(ru^{(m)}_r)\in span\{\varphi_l;\;0\le l\le m\},
\end{equation*}
and $\partial^{2j}_{\phi}u^{(m)}$ is harmonic. In particular, for $j=0$ we obtain
\begin{equation}\label{5.5}
\begin{split}
\frac12\frac{d}{dt}\big(\|\nabla&u^{(m)}\|^2_{L^2(B)}\big)
=\int_B\nabla u^{(m)}\nabla u^{(m)}_t\;dz=(u^{(m)}_r,u^{(m)}_t)_{L^2(S^1)}\\
&=-(u^{(m)}_r,(\varepsilon+d\pi_N(v)) u^{(m)}_r)_{L^2(S^1)}\\
&=-\varepsilon\|u^{(m)}_r\|^2_{L^2(S^1)}-\|d\pi_N(v)u^{(m)}_r\|^2_{L^2(S^1)}\\
&\le-\frac12\|u^{(m)}_t\|^2_{L^2(S^1)}\le 0,
\end{split}
\end{equation}
and we find the uniform $H^1$-bound
\begin{equation}\label{5.6}
\begin{split}
\sup_{t\ge 0}&\|\nabla u^{(m)}(t)\|^2_{L^2(B)}
+\varepsilon\|u^{(m)}_r\|^2_{L^2([0,\infty[\times S^1)}
+\|u^{(m)}_t\|^2_{L^2([0,\infty[\times S^1)}\\
&\le 2\|\nabla u^{(m)}(0)\|^2_{L^2(B)}\le 2\|\nabla u_0\|^2_{L^2(B)}\le 2R_0^2.
\end{split}
\end{equation}

Moreover, for $j=k\in\mathbbm N$ as in the definition of $X$ upon integrating by parts we find
\begin{equation}\label{5.7}
\begin{split}
\frac12&\frac{d}{dt}\big(\|\nabla\partial^k_{\phi}u^{(m)}\|^2_{L^2(B)}\big)
=(-1)^k\int_B\nabla\partial^{2k}_{\phi}u^{(m)}\nabla u^{(m)}_t\;dz\\
&=(-1)^k(\partial^{2k}_{\phi}u^{(m)}_r,u^{(m)}_t)_{L^2(S^1)}\\
&=(-1)^{k+1}(\partial^{2k}_{\phi}u^{(m)}_r,
(\varepsilon+d\pi_N(v))u^{(m)}_r)_{L^2(S^1)}\\
&=-\varepsilon\|\partial^k_{\phi}u^{(m)}_r\|^2_{L^2(S^1)}
-\|d\pi_N(v)\partial^k_{\phi}u^{(m)}_r\|^2_{L^2(S^1)}+I,
\end{split}
\end{equation}
where $I=\sum_{j=1}^k\Big({k\atop j}\Big)I_j$ with
\begin{equation*}
I_j=-(\partial^k_{\phi}u^{(m)}_r,\partial^j_{\phi}(d\pi_N(v))
\partial^{k-j}_{\phi}u^{(m)}_r)_{L^2(S^1)}
\end{equation*}
similar to the proof of Lemma \ref{lemma4.3}. However, now we simply bound
\begin{equation*}
|I_j|\le C\sum_{\Sigma_ij_i=j}\|\partial^k_{\phi}u^{(m)}_r\|_{L^2(S^1)}
\|\Pi_i\partial^{j_i}_{\phi}v\partial^{k-j}_{\phi}u^{(m)}_r\|_{L^2(S^1)}, \ 1\le j\le k.
\end{equation*}

Note that by compactness of Sobolev's embedding $H^1(S^1)\hookrightarrow L^{\infty}(S^1)$
and Ehrlich's lemma for any number $1\le j\le k$, any $\delta>0$ we can bound
\begin{equation*}
\begin{split}
\|\partial^{k-j}_{\phi}&u^{(m)}_r\|_{L^{\infty}(S^1)}
\le\delta\|\partial^{k-j+1}_{\phi}u^{(m)}_r\|_{L^2(S^1)}
+C(\delta)\|\partial^{k-j}_{\phi}u^{(m)}_r\|_{L^2(S^1)}\\
&\le2\delta\|\partial^k_{\phi}u^{(m)}_r\|_{L^2(S^1)}
+C(\delta)\|u^{(m)}_r\|_{L^2(S^1)}.
\end{split}
\end{equation*}
On the other hand, for any $v\in V$ by the trace theorem we have
\begin{equation*}
\|\partial^k_{\phi}v\|_{L^2(S^1)}\le C\|\partial^k_{\phi}v\|_{H^1(B)}
\le C\|v\|_{H^{k+1}(B)}\le CR_0
\end{equation*}
and we therefore also can bound
\begin{equation*}
\|\partial^j_{\phi}v\|_{L^{\infty}(S^1)}
\le C\|\partial^k_{\phi}v\|_{L^2(S^1)}+\|\partial^j_{\phi}v\|_{L^2(S^1)}
\le C\|v\|_{H^{k+1}(B)}\le CR_0.
\end{equation*}
for any $1\le j<k$.

Thus, for sufficiently small $\delta>0$ with a constant $C>0$ depending on
$\varepsilon>0$ and $R_0$ there holds
\begin{equation*}
|I|\le\varepsilon/2 \|\partial^k_{\phi}u^{(m)}_r\|^2_{L^2(S^1)}
+C\|u^{(m)}_r\|^2_{L^2(S^1)}
\end{equation*}
and from \eqref{5.7} with the help of \eqref{3.7} we obtain the inequality
\begin{equation*}
\begin{split}
\frac{d}{dt}&\big(\|\nabla\partial^k_{\phi}u^{(m)}\|^2_{L^2(B)}\big)
\le C\|u^{(m)}_r\|^2_{L^2(S^1)}=C\|u^{(m)}_{\phi}\|^2_{L^2(S^1)}
\le C\|u^{(m)}_{\phi}\|^2_{H^1(B)}\\
&\le C\|\nabla\partial^k_{\phi}u^{(m)}\|^2_{L^2(B)}+C\|\nabla u^{(m)}\|^2_{L^2(B)}
\le C(1+\|\nabla\partial^k_{\phi}u^{(m)}\|^2_{L^2(B)}),
\end{split}
\end{equation*}
where we recall \eqref{5.6} for the last conclusion.

It follows that for suitably small $T>0$ there holds $\|u^{(m)}\|^2_X\le 4R_0^2$
for all $m\in\mathbbm N$. Thus, there is a sequence $m\to\infty$ such that
$u^{(m)}\rightharpoondown u$ weakly-$*$ in $L^{\infty}([0,T]; H^{k+1}(B))$
with $u^{(m)}_t\rightharpoondown u_t$ weakly in $L^2([0,T]\times S^1)$,
where $u=:\Phi(v)\in V$ solves equation \eqref{5.2}.
\end{proof}

\begin{lemma}\label{lemma5.3}
There is $T>0$ such that for $v_1,v_2\in V$ there holds
\begin{equation*}
|\Phi(v_1)-\Phi(v_2)|_X\le\frac12|v_1-v_2|_X.
\end{equation*}
\end{lemma}

\begin{proof}
Let $T_2>0$ be as determined in Lemma \ref{lemma5.2} and fix some $0<T\le T_2$.
For $v_1,v_2\in V$ then we have $u_i=:\Phi(v_i)\in V$, $i=1,2$. Set
$w=u_1-u_2$, $v=v_1-v_2$, and compute
\begin{equation}\label{5.8}
w_t=-(\varepsilon+d\pi_N(v_1))w_r
-(d\pi_N(v_1)-d\pi_N(v_2))u_{2,r}
\hbox{ on } \partial B=S^1.
\end{equation}
Multiplying with $w_r$ and integrating we obtain
\begin{equation*}
\begin{split}
\frac12&\frac{d}{dt}\big(\|\nabla w\|^2_{L^2(B)}\big)
=\int_B\nabla w\nabla w_t\;dx=(w_r,w_t)_{L^2(S^1)}
=-\varepsilon\|w_r\|^2_{L^2(S^1)}\\
&-\|d\pi_N(v_1)w_r\|^2_{L^2(S^1)}
-(w_r,(d\pi_N(v_1)-d\pi_N(v_2)u_{2,r})_{L^2(S^1)},
\end{split}
\end{equation*}
where with $\|u_{2,r}\|_{L^{\infty}(S^1)}\le C\|u_2\|_{H^3(B)}\le CR_0$ we can bound
\begin{equation*}
\begin{split}
|(w_r,&(d\pi_N(v_1)-d\pi_N(v_2))u_{2,r})_{L^2(S^1)}|
\le C\|w_r\|_{L^2(S^1)}\|v\|_{L^2(S^1)}\|u_{2,r}\|_{L^{\infty}(S^1)}\\
&\le C\|w_r\|_{L^2(S^1)}\|v\|_{L^2(S^1)}
\le\frac{\varepsilon}{2}\|w_r\|^2_{L^2(S^1)}+C\|v\|^2_{L^2(S^1)}.
\end{split}
\end{equation*}
Thus, with a constant $C=C(\varepsilon)>0$ we find
\begin{equation}\label{5.9}
\frac{d}{dt}\|\nabla w\|^2_{L^2(B)}+\varepsilon\|w_r\|^2_{L^2(S^1)}\le C\|v\|^2_{L^2(S^1)}.
\end{equation}
Similarly, from \eqref{5.8} we can bound
\begin{equation}\label{5.10}
\|w_t\|^2_{L^2(S^1)}\le C\|w_r\|^2_{L^2(S^1)}+C\|v\|^2_{L^2(S^1)}.
\end{equation}
Integrating over $0\le t\le T$ and observing that we have
\begin{equation*}
\sup_{0\le t\le T}\|v(t)\|^2_{L^2(S^1)}\le\big(\int_0^T\|v_t(t)\|_{L^2(S^1)}dt\big)^2
\le T\int_0^T\|v_t(t)\|^2_{L^2(S^1)}dt,
\end{equation*}
from \eqref{5.9} we first obtain
\begin{equation*}
\begin{split}
\sup_{0\le t\le T}\|\nabla w(t)\|^2_{L^2(B)}+\varepsilon\|w_r\|^2_{L^2([0,T]\times S^1)}
\le CT\sup_{0\le t\le T}\|v(t)\|^2_{L^2(S^1)}\le CT^2|v|^2_X,
\end{split}
\end{equation*}
which we may use together with \eqref{5.10} to bound
\begin{equation*}
\begin{split}
|w|^2_X&=\sup_{0\le t\le T}\|\nabla w(t)\|^2_{L^2(B)}+\|w_t\|^2_{L^2([0,T]\times S^1)}
\le CT^2|v|^2_X.
\end{split}
\end{equation*}
For sufficiently small $T>0$ then our claim follows.
\end{proof}

Thus, by Banach's fixed point theorem,
for any $\varepsilon>0$, any smooth $u_0\in H^{1/2}(S^1;N)$ there exists $T>0$ and a solution $u=u(t)\in V$ of the initial value problem \eqref{5.1}, \eqref{1.4}. We now show that the number $T>0$ may be chosen uniformly as $\varepsilon\downarrow 0$.
Indeed, we have the following result.

\begin{lemma}\label{lemma5.4}
There exists a constant $C>0$ such that for any $k\ge 2$, any smooth
$u_0\in H^{1/2}(S^1;N)$, and any $0<\varepsilon\le 1/2$ for the solution
$u$ to \eqref{5.1} with $u(0)=u_0$ there holds
\begin{equation*}
\frac{d}{dt}\big(\|\nabla\partial^k_{\phi}u\|^2_{L^2(B)}\big)
\le C(1+\|\nabla u\|^2_{L^2(B)}+\|\nabla\partial^k_{\phi}u\|_{L^2(B})^{k+3}.
\end{equation*}
\end{lemma}

\begin{proof}
Similar to the proof of Lemma \ref{lemma5.2}, for given $2\le k\in\mathbbm N$ we compute
\begin{equation}\label{5.11}
\begin{split}
\frac12&\frac{d}{dt}\big(\|\nabla\partial^k_{\phi}u\|^2_{L^2(B)}\big)
=(-1)^k\int_B\nabla\partial^{2k}_{\phi}u\nabla u_t\;dx\\
&=(-1)^k(\partial^{2k}_{\phi}u_r,u_t)_{L^2(S^1)}
=(-1)^{k+1}(\partial^{2k}_{\phi}u_r,
(\varepsilon+d\pi_N(u))u_r)_{L^2(S^1)}\\
&\le-\|d\pi_N(u)\partial^k_{\phi}u_r\|^2_{L^2(S^1)}-I,
\end{split}
\end{equation}
where we now drop the term $\varepsilon\|\partial^k_{\phi}u_r\|^2_{L^2(S^1)}$ from \eqref{5.7}.
Again we split $I=\sum_{j=1}^k\Big({k\atop j}\Big)I_j$ with
\begin{equation*}
\begin{split}
I_j&=(\partial^k_{\phi}u_r,\partial^j_{\phi}(d\pi_N(u))
\partial^{k-j}_{\phi}u_r)_{L^2(S^1)}\\
&=(\nabla\partial^k_{\phi}u,\nabla(\partial^j_{\phi}(d\pi_N(u))
\partial^{k-j}_{\phi}u_r))_{L^2(B)},
\end{split}
\end{equation*}
but now we bound these terms as in the proof of Lemma \ref{lemma4.3} via
\begin{equation*}
\begin{split}
|I_j|&\le C\|\nabla\partial^k_{\phi}u\|_{L^2(B)}
\big(\|\nabla\partial^j_{\phi}(d\pi_N(u))\partial^{k-j}_{\phi}u_r\|_{L^2(B)}
+\|\partial^j_{\phi}(d\pi_N(u))\nabla\partial^{k-j}_{\phi}u_r\|_{L^2(B)}\big)\\
&\le C\sum_{1\le j_i\le k+1,\,\Sigma_ij_i=k+2}\|\nabla\partial^k_{\phi}u\|_{L^2(B)}
\|\Pi_i\nabla^{j_i}u\|_{L^2(B)}.
\end{split}
\end{equation*}
Using that for any $k\ge 2$ by Sobolev's embedding
$H^2(B)\hookrightarrow W^{1,4}\cap C^0(\bar{B})$
be can bound
\begin{equation*}
\begin{split}
\sum_{1\le j_i\le k+1,\,\Sigma_ij_i=k+2}\|\Pi_i\nabla^{j_i}u\|_{L^2(B)}
\le C(1+\|\nabla u\|_{L^2(B)}+\|\nabla^{k+1}u\|_{L^2(B)})^{k+2},
\end{split}
\end{equation*}
and also using \eqref{4.3}, we obtain the claim.
\end{proof}

We now are able to conclude.

\begin{proposition}\label{prop5.5}
For any $k\ge 2$, any smooth $u_0\in H^{1/2}(S^1;N)$ there exists
$T>0$ and a solution $u\in V$ to \eqref{1.3} with initial data $u(0)=u_0$.
\end{proposition}

\begin{proof}
In view of Lemma \ref{lemma5.4}, there exists a uniform number $T>0$ such that, with $V$ as defined above, for any $0<\varepsilon\le 1/2$ for there exists a solution
$u_{\varepsilon}\in V$ to \eqref{5.1}. By definition of $V$, as $\varepsilon\downarrow 0$
suitably, we have $u_{\varepsilon}\to u$ weakly-$*$ in
$L^{\infty}([0,T];H^{k+1}(B))\cap H^1(S^1\times[0,T])$. But this suffices to pass to the limit
$\varepsilon\downarrow 0$ in \eqref{5.1}, and $u\in V$ solves \eqref{1.3} with $u(0)=u_0$.
\end{proof}

\begin{proof}[Proof of Theorem 1.1.i).]
By Proposition \ref{prop5.5} for any smooth $u_0\in H^{1/2}(S^1;N)$ and any $k\ge 2$ there exists $T>0$ and a solution $u\in V$ of \eqref{1.3}, \eqref{1.4} for $0<t<T$.
Alternatingly employing Propositions \ref{prop4.11} and \ref{prop4.6}, we then obtain smoothness of $u$ for $0<t\le T$, including the final time $T$. (This argument later appears in more detail in Section \ref{Weak solutions} after Lemma \ref{lemma6.2}.)
Iterating, the solution $u$
may be extended smoothly until some maximal time $T_0$ where condition \eqref{4.4}
ceases to hold. Uniqueness (even within a much larger class of competing functions)
is established in Section \ref{Uniqueness}.
\end{proof}

\section{Weak solutions}\label{Weak solutions}
Given $u_0\in H^{1/2}(S^1;N)$, there are smooth functions
$u_{0k}\in H^{1/2}(S^1;N)$ with $u_{0k}\to u_0$ in $H^1(B)$ as $k\to\infty$.
Indeed, similar to an argument of Schoen-Uhlenbeck \cite{Schoen-Uhlenbeck-1982}, Theorem 3.1,
with a standard mollifying sequence $(\rho_k)_{k\in\mathbbm N}$ for the mollified functions
$v_{0k}:= u_0*\rho_k$ we have $dist_N(v_{0k}\to 0$ uniformly, and
$u_{0k}:=\pi_N(v_{0k})\to u_0\in H^{1/2}(S^1;N)$ as $k\to\infty$.

Let $u_k$ be the corresponding solutions of \eqref{1.4}
with initial data $u_k(0)=u_{0k}$, defined on a maximal time interval $[0,T_k[$, $k\in\mathbbm N$.
We claim that each function $u_k$ can be smoothly extended to a uniform time interval $[0,T[$ for some $T>0$. To see this, we first establish the following non-concentration result.

\begin{lemma}\label{lemma6.1}
For any $\delta>0$ there exists a number $R>0$ and a time $T_0>0$ such that
\begin{equation*}
\sup_{z_0\in B,\,0<t<T_0}\int_{B_R(z_0)\cap B}|\nabla u_k(t)|^2dz<\delta
\ \hbox{ for all }k\in\mathbbm N.
\end{equation*}
\end{lemma}

\begin{proof}
Given $\delta>0$, by absolute continuity of the Lebesgue integral and $H^1$-convergence
$u_{0k}\to u_0$ ($k\to\infty$) we can find $R>0$ such that
\begin{equation*}
\sup_{z_0\in B}\int_{B_{2R}(z_0)\cap B}|\nabla u_{0k}|^2dz<\delta
\ \hbox{ for all }k\in\mathbbm N.
\end{equation*}
Choosing $T_0=\delta R$, by Lemma \ref{lemma2.2} then we have
\begin{equation*}
\sup_{z_0\in B,\,0<t<T_0}\int_{B_R(z_0)\cap B}|\nabla u_k(t)|^2dz
<4\delta+C\delta E(u_{k0})<L\delta
\end{equation*}
with a uniform constant $L>0$ for all $k\in\mathbbm N$. The claim follows, if we replace
$\delta$ with $\delta/L$.
\end{proof}

In view of Proposition \ref{prop3.3}, from Lemma \ref{lemma6.1} and Lemma \ref{lemma2.1}
we obtain the following bound for $u_k$ in $H^1(S^1)$.

\begin{lemma}\label{lemma6.2}
There exist a time $T_0>0$ and constants $C>0$, $C_0=C_0(E(u_0))>0$ such that
\begin{equation*}
\int_0^{T_0}\int_{S^1}|\partial_{\phi}u_k(t)|^2d\phi\,dt\le CE(u_{k0})\le C_0
\ \hbox{ for all }k\in\mathbbm N.
\end{equation*}
\end{lemma}

From Lemma \ref{lemma6.2} we obtain locally in time uniform smooth bounds for $(u_k)$ for $t>0$
by iteratively applying our previous regularity results. More precisely, Fatou's lemma and Lemma \ref{lemma6.2} first yield the bound
\begin{equation*}
\int_0^{T_0}\liminf_{k\to\infty}\big(\int_{S^1}|\partial_{\phi}u_k(t)|^2d\phi\big)dt
\le C_0.
\end{equation*}
Thus for almost every $0<t_0<T_0$ there holds
\begin{equation*}
\liminf_{k\to\infty}\int_{S^1}|\partial_{\phi}u_k(t_0)|^2d\phi<\infty.
\end{equation*}
For any such $0<t_0<T_0$, if $\delta>0$ is sufficiently small, from Proposition \ref{prop4.2}
with another appeal to Fatou's lemma we may conclude
\begin{equation*}
\int_{t_0}^{T_0}\liminf_{k\to\infty}\int_B|\nabla\partial_{\phi}u_k|^2dz\,dt
\le\liminf_{k\to\infty}\int_{t_0}^{T_0}\int_B|\nabla\partial_{\phi}u_k|^2dz\,dt\le C_1
\end{equation*}
for some $C_1>0$, so that now we even have
\begin{equation*}
\liminf_{k\to\infty}\int_B|\nabla\partial_{\phi}u_k(t_1)|^2dz<\infty.
\end{equation*}
for almost every $t_0<t_1<T_0$. Hence we may next invoke Proposition \ref{prop4.4}
and \eqref{4.2} to obtain the bound
\begin{equation*}
\liminf_{k\to\infty}
\int_{t_1}^{T_0}\int_{\partial B}|\nabla\partial_{\phi}u_k|^2dz\,dt<\infty
\end{equation*}
for any such $t_0<t_1<T_0$, and Fatou's lemma gives that
\begin{equation*}
\liminf_{k\to\infty}\int_{\partial B}|\nabla\partial_{\phi}u_k(t_2)|^2d\phi<\infty
\end{equation*}
for almost every $t_1<t_2<T_0$. Now Proposition \ref{prop4.10} may be applied with $\varphi_0=1$,
and we obtain
\begin{equation*}
\liminf_{k\to\infty}
\int_{t_2}^{T_0}\int_B|\nabla\partial^2_{\phi}u_k|^2dz\,dt<\infty
\end{equation*}
for any such $t_1<t_2<T_0$. Another application of Fatou's lemma gives
\begin{equation*}
\liminf_{k\to\infty}\int_B|\nabla\partial^2_{\phi}u_k(t_3)|^2dz<\infty
\end{equation*}
for almost every $t_2<t_3<T_0$, and Proposition \ref{prop4.5} yields
\begin{equation*}
\liminf_{k\to\infty}
\int_{t_3}^{T_0}\int_{\partial B}|\nabla\partial^2_{\phi}u_k|^2d\phi\,dt<\infty
\end{equation*}
for any such $t_2<t_3<T_0$. We may then iterate, using \eqref{3.7} and alternatingly employing Propositions \ref{prop4.11} and \ref{prop4.6} for $3\le k\in\mathbbm N$, to find a subsequence
$(u_k)$ satisfying uniform smooth bounds on $]t_0,T_0]$ for any $t_0>0$. Passing to the limit
$k\to\infty$ for this subsequence we obtain a weak solution to \eqref{1.3}, \eqref{1.4}
of energy-class in the following sense.

\begin{definition}\label{def6.3}
A function $u\in H^1([0,T_0]\times S^1;N)\cap L^{\infty}([0,T_0]; H^{1/2}(S^1;N))$ is a weak solution of \eqref{1.3}, \eqref{1.4} of energy-class, if \eqref{1.3} is satisfied in the weak sense, that is, if there holds
\begin{equation}\label{6.1}
\begin{split}
\int_0^{T_0}\int_{\partial B}&(u_t+d\pi_N(u)u_r)\cdot\varphi d\phi\,dt\\
&=\int_0^{T_0}\int_{\partial B}u_t\cdot\varphi d\phi\,dt
+\int_0^{T_0}\int_B\nabla u\cdot\nabla\big(d\pi_N(u)\varphi\big)dz\,dt=0
\end{split}
\end{equation}
for all $\varphi\in C^{\infty}_c(S^1\times ]0,T_0[)$, and if there holds the energy inequality
\begin{equation}\label{6.2}
E(u(T))+\int_0^T\int_{\partial B}|u_t|^2d\phi\;dt\le E(u_0)
\end{equation}
for any $0<T<T_0$, with the initial data
$u_0\in H^{1/2}(S^1;N)$ being attained in the sense of traces.
\end{definition}

We then may summarize our results, as follows.

\begin{proposition}\label{prop6.4}
For any $u_0\in H^{1/2}(S^1;N)$ there exists $T_0>0$ and a weak solution $u$ to \eqref{1.3},
\eqref{1.4} on $[0,T_0]$ of energy-class, which is smooth for $t>0$.
\end{proposition}

\begin{proof}
For any open $U\subset S^1\times ]0,T_0[$ we have uniform smooth bounds for $u_k$ on $U$;
thus a suitable sub-sequence $u_k\to u$ smoothly locally as $k\to\infty$.
The equation \eqref{6.1} follows from the corresponding identites for $u_k$.

Moreover, \eqref{6.2} follows from the energy identity, Lemma \ref{lemma2.1}, for $u_k$ in view of
$H^1$-convergence $u_{0k}\to u_0$ as well as weak lower semi-continuity of the energy and of the $L^2$-norm.

Finally, with error $o(1)\to 0$ as $k\to\infty$ for $0<t<T_0$ we can estimate
\begin{equation*}
\begin{split}
\|&u(t)-u_0\|^2_{L^2(\partial B)}\le\|u_k(t)-u_{0k}\|^2_{L^2(\partial B)}+o(1)\\
&\le\Big(\int_0^t\|\partial_tu_k(t')\|_{L^2(\partial B)}dt'\Big)^2+o(1)
\le t\int_0^t\|\partial_tu_k(t')\|^2_{L^2(\partial B)}dt'+o(1)\\
&\le tE(u_0)+o(1)\to 0\ \hbox{ as } t\downarrow 0,
\end{split}
\end{equation*}
and $u(t)\to u_0$ weakly in $H^{1/2}(S^1;N)\cap H^1(B;\mathbbm R^n)$ as $t\downarrow 0$. In fact,
by \eqref{6.2} we then even have strong convergence.
\end{proof}

\section{Uniqueness}\label{Uniqueness}
With the help of the tools developed in Section \ref{A regularity estimate}
we can show uniqueness of partially regular weak energy-class solutions as in Proposition \ref{prop6.4}.

\begin{theorem}\label{thm7.1}
Let $u_0\in H^{1/2}(S^1;N)$. Suppose $u$ and $v$ both are weak energy-class solutions of \eqref{1.3}, \eqref{1.4} on $[0,T_0]$ for some $T_0>0$ with initial data $u_0$, and suppose that $u$ and $v$ are smooth for $t>0$. Then $u=v$.
\end{theorem}

\begin{proof}
Using the identity \eqref{3.2} for $u$ and $v$, respectively, for the function $w=u-v$
for almost every $0<t<T_0$ we have
\begin{equation}\label{7.1}
\begin{split}
&\partial_tw+\partial_rw=\nu(u)\partial_r(dist_N(u))-\nu(v)\partial_r(dist_N(v))\\
&\quad=(\nu(u)-\nu(v))\partial_r(dist_N(u))+\nu(v)\partial_r(dist_N(u)-dist_N(v))
\end{split}
\end{equation}
on $\partial B=S^1$.
From equation \eqref{3.5}, moreover, we obtain
\begin{equation}\label{7.2}
\begin{split}
|\Delta(dist_N(u)&-dist_N(v))|=|\nabla u\cdot d\nu(u)\nabla u-\nabla v\cdot d\nu(v)\nabla v|\\
&\le C(|w||\nabla u|^2+(|\nabla u|+|\nabla v|)|\nabla w|)\hbox{ in }B.
\end{split}
\end{equation}
Observing that
\begin{equation*}
|dist_N(u)-dist_N(v)|\le C|w|,
\end{equation*}
upon multiplying \eqref{7.2} with the function $(dist_N(u)-dist_N(v))\in H_0^1(B)$,
integrating by parts, and using Young's inequality, for any $\varepsilon>0$ we obtain
\begin{equation}\label{7.3}
\begin{split}
\|\nabla&(dist_N(u)-dist_N(v))\|^2_{L^2(B)}\\
&\le C\int_B(|w|^2|\nabla u|^2+(|\nabla u|+|\nabla v|)|\nabla w||w|)dz\\
&\le\varepsilon\|\nabla w\|^2_{L^2(B)}
+C(\varepsilon)\|w\|^2_{L^4(B)}(\|\nabla u\|^2_{L^4(B)}+\|\nabla v\|^2_{L^4(B)}).
\end{split}
\end{equation}
On the other hand, for any $0<t_0<T\le T_0$, multiplying the equation \eqref{7.1} with $w$
and integrating by parts on $S^1\times [t_0,T]$, upon letting $t_0\downarrow 0$ we find
\begin{equation*}
\begin{split}
\sup_{0<t<T}&\|w(t)\|^2_{L^2(\partial B)}+\int_0^T\int_B|\nabla w|^2dz\,dt
\le C\int_0^T\int_{\partial B}(\partial_tw+\partial_rw)w\,d\phi\,dt\\
&=C\int_0^T\int_{\partial B}w(\nu(u)-\nu(v))\partial_r(dist_N(u))d\phi\,dt\\
&\quad+C\int_0^T\int_{\partial B}w\,\nu(v)\partial_r(dist_N(u)-dist_N(v))\,d\phi\,dt
=:C\int_0^T(I+II)dt.
\end{split}
\end{equation*}
We first estimate the term
\begin{equation*}
\begin{split}
I&=I(t)=\int_{\partial B}w(\nu(u)-\nu(v))\partial_r(dist_N(u))\,d\phi\\
&=\int_B\nabla\big(w(\nu(u)-\nu(v))\big)\nabla(dist_N(u))\,dz\\
&\quad+\int_Bw(\nu(u)-\nu(v))\Delta(dist_N(u))\,dz.
\end{split}
\end{equation*}
Using
\begin{equation*}
\begin{split}
|\nabla\big(w(\nu(u)-\nu(v))\big)|&\le C|\nabla w||w|
+|w\big((d\nu(u)-d\nu(v))\nabla u+d\nu(v)\nabla w\big)|\\
&\le C(|\nabla w||w|+|w|^2|\nabla u|)
\end{split}
\end{equation*}
we can bound
\begin{equation*}
\begin{split}
|\int_B&\nabla\big(w(\nu(u)-\nu(v))\big)\nabla(dist_N(u))dz|
\le C\int_B|(\nabla w||w|+|w|^2|\nabla u|)|\nabla u|dz\\
&\le \varepsilon\|\nabla w\|^2_{L^2(B)}+C(\varepsilon)\|w\|^2_{L^4(B)}\|\nabla u\|^2_{L^4(B)}
\end{split}
\end{equation*}
for each $t$. Also using \eqref{3.5}, we can moreover estimate
\begin{equation*}
\begin{split}
|\int_B&w(\nu(u)-\nu(v))\Delta(dist_N(u))\,dz|
\le C\|w\|^2_{L^4(B)}\|\nabla u\|^2_{L^4(B)}
\end{split}
\end{equation*}
for almost every $0<t<T$ to obtain
\begin{equation*}
\begin{split}
|I|\le \varepsilon\|\nabla w\|^2_{L^2(B)}
+C(\varepsilon)\|w\|^2_{L^4(B)}\|\nabla u\|^2_{L^4(B)}.
\end{split}
\end{equation*}

Similarly, we estimate the term
\begin{equation*}
\begin{split}
II&=II(t)=\int_{\partial B}w\,\nu(v)\partial_r((dist_N(u)-dist_N(v))\,d\phi\\
&=\int_B\nabla(w\nu(v))\nabla(dist_N(u)-dist_N(v))dz\\
&\quad+\int_Bw\,\nu(v)\Delta(dist_N(u)-dist_N(v))\,dz.
\end{split}
\end{equation*}
Noting that with \eqref{7.3} we can bound
\begin{equation*}
\begin{split}
|\int_B&\nabla(w\nu(v))\nabla(dist_N(u)-dist_N(v))dz|\\
&\le C(\|\nabla w\|_{L^2(B)}+\|w\nabla v\|_{L^2(B)})\|\nabla(dist_N(u)-dist_N(v))\|_{L^2(B)}\\
&\le\varepsilon\|\nabla w\|^2_{L^2(B)}
+C(\varepsilon)\|w\|^2_{L^4(B)}(\|\nabla u\|^2_{L^4(B)}+\|\nabla v\|^2_{L^4(B)})
\end{split}
\end{equation*}
and that with \eqref{7.2} we have
\begin{equation*}
\begin{split}
|\int_B&w\nu(v)\Delta(dist_N(u)-dist_N(v))\,dz|\\
&\le C\int_B(|w|^2|\nabla u|^2+|w||\nabla w|(|\nabla u|+|\nabla v|))\,dz\\
&\le\varepsilon\|\nabla w\|^2_{L^2(B)}
+C(\varepsilon)\|w\|^2_{L^4(B)}(\|\nabla u\|^2_{L^4(B)}+\|\nabla v\|^2_{L^4(B)})
\end{split}
\end{equation*}
we find the estimate
\begin{equation*}
\begin{split}
|II|\le\varepsilon\|\nabla w\|^2_{L^2(B)}+C(\varepsilon)\|w\|^2_{L^4(B)}
(\|\nabla u\|^2_{L^4(B)}+\|\nabla v\|^2_{L^4(B)})
\end{split}
\end{equation*}
for almost every $0<t<T$.

But Sobolev's embedding $H^{1/2}(B)\hookrightarrow L^4(B)$ and Fourier expansion give the bound
\begin{equation*}
\|w\|^2_{L^4(B)}\le C\|w\|^2_{H^{1/2}(B)}\le C\|w\|^2_{L^2(\partial B)}
\end{equation*}
and similar bounds for $\nabla u$ as well as $\nabla v$.
Moreover, since by the energy inequality \eqref{6.2} we have $u(t),v(t)\to u_0$ strongly in $H^1(B)$ as $t\downarrow 0$, there exist a radius $0<R\le1/2$ and a time $0<T<T_0$
such that condition \eqref{3.13} in Proposition \ref{prop3.3} holds true on
$[0,T]$ for both $u$ and $v$, allowing to bound
\begin{equation*}
\begin{split}
\int_0^T&\|\nabla u(t)\|^2_{L^4(B)}dt
\le C\int_0^T\|\nabla u(t)\|^2_{L^2(\partial B)}dt
\le C\int_0^T\|\partial_{\phi}u(t)\|^2_{L^2(\partial B)}dt\\
&\le C\int_0^T\int_{\partial B}|u_t|^2d\phi\,dt+C(R)TE(u_0)\le C(R)(1+T_0)E(u_0)
\end{split}
\end{equation*}
with the help of \eqref{3.7}, and similarly for $|\nabla v|$.
Choosing $\varepsilon=1/4$, for sufficiently small $0<T<T_0$ by absolute continuity of the integral we thus can estimate
\begin{equation*}
\begin{split}
&\sup_{0<t<T}\|w(t)\|^2_{L^2(\partial B)}+\int_0^T\int_B|\nabla w|^2dz\,dt\\
&\quad\le\frac12\|\nabla w\|^2_{L^2(B\times[0,T])}
+C\sup_{0<t<T}\|w(t)\|^2_{L^2(\partial B)}
\int_0^T(\|\nabla u\|^2_{L^4(B)}+\|\nabla v\|^2_{L^4(B)})dt\\
&\quad\le\frac12\Big(\sup_{0<t<T}\|w(t)\|^2_{L^2(\partial B)}
+\int_0^T\int_B|\nabla w|^2dz\,dt\Big),
\end{split}
\end{equation*}
and it follows that $w=0$, as claimed.
\end{proof}

\begin{proof}[Proof of Theorem \ref{thm1.2}]
Existence for short time and uniqueness of a partially regular weak solution to \eqref{1.3}, \eqref{1.4} for given data $u_0\in H^{1/2}(S^1;N)$ follow from Proposition \ref{prop6.4} and Theorem \ref{thm7.1}, respectively.
Since by Proposition \ref{prop6.4} our weak solution is smooth for $t>0$, the remaining assertions follow from Theorem \ref{thm1.1}.

Note that at any blow-up time $T_{i-1}$, $i\ge 1$, of the flow as in Theorem \ref{thm1.1}.ii)
there exists a unique weak limit $u_i=\lim_{t\uparrow T_{i-1}}u(t)\in H^{1/2}(S^1;N)$,
and we may uniquely continue the flow using Proposition \ref{prop6.4}.
\end{proof}

\section{Blow-up}\label{Blow-up}
Preparing for the proof of part ii) of Theorem \ref{thm1.1} suppose now that for the solution constructed in part i) of that theorem there holds $T_0<\infty$.
Then, as we shall see in more detail below, by the results in Section \ref{Higher regularity}
condition \eqref{4.4} must be violated for $T=T_0$ and there exist $\delta>0$ and points
$z_k\in B$ as well as radii $r_k\downarrow 0$ as $k\to\infty$ such that for suitable
$t_k\uparrow T_0$ there holds
\begin{equation*}
\int_{B_{r_k}(z_k)\cap B}|\nabla u(t_k)|^2dz
=\sup_{z_0\in B,\,t\le t_k}\int_{B_{r_k}(z_0)\cap B}|\nabla u(t)|^2dz=\delta.
\end{equation*}
We may later choose a smaller constant $\delta>0$, if necessary. Moreover, for later use from now on we consider local concentrations in the sense that for some $z_0\in B$ and some fixed radius $r_0>0$ for a sequence of points $z_k\in B$ with $z_k\to z_0$ and radii
$r_k\downarrow 0$ for suitable $t_k\uparrow T_0$ as $k\to\infty$ there holds
\begin{equation*}
\int_{B_{r_k}(z_k)\cap B}|\nabla u(t_k)|^2dz
=\sup_{z'\in B_{r_0}(z_0),\,t\le t_k}\int_{B_{r_k}(z')\cap B}|\nabla u(t)|^2dz=\delta.
\end{equation*}

Scale
\begin{equation*}
u_k(z,t)=u(z_k+r_kz,t_k+r_kt)
\end{equation*}
for
\begin{equation*}
z\in\Omega_k=\{z;\;z_k+r_kz\in B\},\ t\in I_k=\{t;\; 0\le t_k+tr_k<T_0\}.
\end{equation*}
Note that then there holds
\begin{equation}\label{8.1}
\begin{split}
\int_{B_1(0)\cap\Omega_k}&|\nabla u_k(0)|^2dz\\
&=\sup_{z_k+r_kz'\in B_{r_0}(z_0),-t_k/r_k\le t<0}
\int_{B_1(z')\cap\Omega_k}|\nabla u_k(t)|^2dz=\delta.
\end{split}
\end{equation}
Passing to a sub-sequence we may assume that the domains $\Omega_k$ exhaust a limit domain
$\Omega_{\infty}\subset\mathbbm R^2$, which either is the whole space $\mathbbm R^2$ or a half-space $H$.

By the energy inequality Lemma \ref{lemma2.1} for $t\in I_k$
there holds
\begin{equation}\label{8.2}
\int_{\Omega_k}|\nabla u_k(t)|^2dz=\int_{B}|\nabla u(t_k+r_kt)|^2dz\le 2E(u_0),
\end{equation}
and for any $t_0<0$ and sufficiently large $k\in\mathbbm N$ we have
\begin{equation}\label{8.3}
\begin{split}
\int_{t_0}^0&\int_{\partial\Omega_k}|\partial_tu_k|^2ds\;dt
=\int_{t_0}^0\int_{\partial\Omega_k}|d\pi_N(u_k)\partial_{\nu_k}u_k|^2ds\;dt\\
&=\int_{t_k+r_kt_0}^{t_k}\int_{\partial B}|u_t|^2d\phi\;dt
\le\int_{t_k+r_kt_0}^{T_0}\int_{\partial B}|u_t|^2d\phi\;dt\to 0
\end{split}
\end{equation}
as $k\to\infty$, where $ds$ is the element of length and where $\nu_k$ is the outward unit normal along $\partial\Omega_k$.
Expressing the harmonic functions $\partial_tu_k(t)$ in Fourier series for each $t<0$,
it then also follows that $\partial_tu_k\to 0$ locally in
$L^2$ on $\Omega_{\infty}\times]-\infty,0[$. Finally, again using the fact that
$u_k(t)$ for each $t$ is harmonic, by the maximum principle we have the uniform bound
$|u_k|\le\sup_{p\in\Gamma}|p|$ as well as uniform smooth bounds locally away from the boundary of $\Omega_{\infty}$.

Hence we may assume that as $k\to\infty$ we have $u_k\to u_{\infty}$ weakly locally in
$H^1$ on $\Omega_{\infty}\times]-\infty,0[$, where $u_{\infty}(z,t)=u_{\infty}(z)$ is independent of time, harmonic, and bounded. Moreover, we have smooth convergence away from
$\partial\Omega_{\infty}$.
Thus, if we assume that $\Omega_{\infty}=\mathbbm R^2$ by \eqref{8.1} it follows that
\begin{equation*}
\int_{B_1(0)}|\nabla u_{\infty}|^2dz=\delta.
\end{equation*}
But any function $v\colon\mathbbm R^2\to\mathbbm R$ which is bounded and harmonic must be constant,
which rules out this possibility. Hence $\Omega_{\infty}$ can only be a half-space.

After a suitable rotation of the domain $B$ and shift of coordinates in $\mathbbm R^2\cong\mathbbm C$
we may then assume that $z_k=(0,-y_k)$ with $1-y_k\le Mr_k$ for some $M\in\mathbbm N$ and that
$\Omega_{\infty}=\{(x,y);\;y>y_0\}$ for some $y_0$. Finally, replacing $r_k>0$ with $(M+1)r_k$
and $z_k$ with $z_k=(0,-1)$, if necessary, we may assume that
$\Omega_k\subset\mathbbm R^2_+=\{(x,y);\;y>0\}$ is the ball of radius $1/r_k$ around the point
$(0,1/r_k)$ with $0\in\partial\Omega_k$,
while from \eqref{8.1} with a uniform number $L\in\mathbbm N$ we have
\begin{equation}\label{8.4}
L\int_{B_1(0)\cap\Omega_k}|\nabla u_k(0)|^2dz
\ge L\delta\ge\sup_{|z'|\le r_0/r_k,-t_k/r_k\le t<0}
\int_{B_1(z')\cap\Omega_k}|\nabla u_k(t)|^2dz
\end{equation}
for any $k\in\mathbbm N$. Let $\Phi_k\colon\mathbbm R^2_+\to\Omega_k$ be the conformal maps given by
\begin{equation*}
\Phi_k(z)=\frac{2z}{2-ir_kz},\ z\in\mathbbm R^2_+,\ k\in\mathbbm N,
\end{equation*}
with $\Phi_k\to id$ locally uniformly on $\mathbbm R^2\cong\mathbbm C$ as $k\to\infty$.

Let $v_k=u_k\circ\Phi_k$, $k\in\mathbbm N$. By conformal invariance of the Dirichlet energy, from \eqref{8.2} for any $t$ we have
\begin{equation}\label{8.5}
\int_{\mathbbm R^2_+}|\nabla v_k(t)|^2dz=\int_{\Omega_k}|\nabla u_k(t)|^2dz\le 2E(u_0),
\end{equation}
and by \eqref{8.4} with a uniform number $L_1\in\mathbbm N$
there holds
\begin{equation}\label{8.6}
L_1\int_{B^+_2(0)}|\nabla v_k(0)|^2dz\ge L_1\delta
\ge\sup_{|z'|\le r_0/r_k,-t_k/r_k\le t<0}\int_{B^+_1(z')}|\nabla v_k(t)|^2dz,
\end{equation}
where $B^+_r(z)=B_r(z)\cap\mathbbm R^2_+$ for any $r>0$ and any $z=(x,y)\in\mathbbm R^2$.
Moreover, from \eqref{8.3} for any $t_0<0$ and any $R>0$ for the integral over
$]-R,R\,[\times\{0\}\subset\partial\mathbbm R^2_+$ we obtain
\begin{equation}\label{8.7}
\begin{split}
\int_{t_0}^0\int_{-R}^R&|\partial_tv_k|^2dx\;dt\\
&\le C\int_{t_0}^0\int_{-R}^R|d\pi_N(v_k)\partial_yv_k|^2dx\;dt\to 0\ \hbox{ as }k\to\infty,
\end{split}
\end{equation}
and $\partial_tv_k\to 0$ locally in $L^2$ on $\overline{\mathbbm R^2_+}\times]-\infty,0[$.
In addition, from our choice of $(u_k)$ it follows that $v_k\to v_{\infty}$ weakly locally in $H^1$ on $\overline{\mathbbm R^2_+}\times]-\infty,0[$ as $k\to\infty$,
where $v_{\infty}(z,t)=:w_{\infty}(z)$ is harmonic and bounded.

For a suitable sequence of times $t_0<s_k<0$,
we then also have locally weak convergence $w_k:=v_k(s_k)\to w_{\infty}$ in $H^1$ on
$\overline{\mathbbm R^2_+}$ and, in addition,
\begin{equation}\label{8.8}
d\pi_N(w_k)\partial_yw_k\to 0\ \hbox{ in } L_{loc}^2(\partial\mathbbm R^2_+)\ \hbox{ as }k\to\infty.
\end{equation}
Thus, for sufficiently small $\delta>0$ by Proposition \ref{prop3.3}, applied to the functions
$w_k\circ\Psi$, where $\Psi\colon B\to\mathbbm R^2_+$ is a suitable conformal map, we also have uniform local $L^2$-bounds for $\partial_xw_k$ on $\partial\mathbbm R^2_+$, and we may assume that
$w_k\to w_{\infty}$ locally uniformly and weakly locally in $H^1$ on $\partial\mathbbm R^2_+$ as $k\to\infty$. Since $w_k$ is harmonic,
we then also have locally strong $H^1$-convergence $w_k\to w_{\infty}$ on $\overline{\mathbbm R^2_+}$.

To see that $w_{\infty}$ is non-constant, let $\varphi_k=\varphi_{z_0,4r_k}$, $k\in\mathbbm N$.
Integrating the identity \eqref{2.1} from the proof of Lemma \ref{lemma2.2} in time,
with error $o(1)\to 0$ and suitable numbers $\varepsilon_k\downarrow 0$ as $k\to\infty$ in view of \eqref{8.3} we find
\begin{equation}\label{8.9}
\begin{split}
\frac12\big|\int_B&|\nabla u(t_k)|^2\varphi_k^2dz
-\int_B|\nabla u(t_k+r_ks_k)|^2\varphi_k^2dz\big|\\
&\le\int_{t_k+r_ks_k}^{t_k}\int_{\partial B}|u_t|^2\varphi_k^2d\phi\,dt
+2\int_{t_k+r_ks_k}^{t_k}\int_B|u_t\nabla u\varphi_k\nabla\varphi_k|dz\,dt\\
&\le o(1)+8\varepsilon_kr_k\int_{t_k+r_ks_k}^{t_k}\int_B|\nabla u|^2|\nabla\varphi_k|^2dz\,dt\\
&\qquad+(8\varepsilon_kr_k)^{-1}\int_{t_k+r_ks_k}^{t_k}\int_B|u_t|^2\varphi_k^2dz\,dt.
\end{split}
\end{equation}
With the help of \eqref{2.2} and \eqref{8.3} for suitable $\varepsilon_k\downarrow 0$
we can bound
\begin{equation*}
\begin{split}
(8\varepsilon_kr_k)^{-1}\int_{t_k+r_ks_k}^{t_k}\int_B|u_t|^2\varphi_k^2dz\,dt
\le C\varepsilon_k^{-1}\int_{t_k+r_ks_k}^{t_k}\int_{\partial B}|u_t|^2dz\,dt\to 0.
\end{split}
\end{equation*}
Since for any choice $t_0<s_k<0$ we also can estimate
\begin{equation*}
\begin{split}
8\varepsilon_kr_k\int_{t_k+r_ks_k}^{t_k}\int_B|\nabla u|^2|\nabla\varphi_k|^2dz\,dt
\le C\varepsilon_k|t_0|E(u_0))\to 0,
\end{split}
\end{equation*}
from \eqref{8.9} and \eqref{8.6} it follows that with error $o(1)\to 0$ as $k\to\infty$
we have
\begin{equation}\label{8.10}
\begin{split}
L_1\int_{B^+_4(0)}&|\nabla w_k|^2dz+o(1)=L_1\int_{B^+_4(0)}|\nabla v_k(s_k)|^2dz+o(1)\\
&\ge L_1\int_{B}|\nabla u(t_k+r_ks_k)|^2\varphi_k^2dz+o(1)
\ge L_1\int_{B}|\nabla u(t_k)|^2\varphi_k^2dz\\
&\ge L_1\int_{B^+_2(0)}|\nabla v_k(0)|^2dz\ge L_1\delta.
\end{split}
\end{equation}

Finally, in view of locally uniform convergence $w_k\to w_{\infty}$ and weak local
$L^2$-convergence of the traces $\nabla w_k\to\nabla w_{\infty}$ on $\partial\mathbbm R^2_+$,
we may pass to the limit $k\to\infty$ in \eqref{8.8} to conclude that
\begin{equation}\label{8.11}
d\pi_N(w_{\infty})\partial_yw_{\infty}=0\ \hbox{ on }\partial\mathbbm R^2_+.
\end{equation}

Since $w_{\infty}$ is harmonic, the Hopf differential
\begin{equation*}
f=|\partial_xw_{\infty}|^2-|\partial_yw_{\infty}|^2
-2i\partial_xw_{\infty}\cdot\partial_yw_{\infty}
\end{equation*}
defines a holomorphic function $f\in L^1(\mathbbm R^2_+,\mathbbm C)$.
Moreover, $w_{\infty}\in H^{3/2}_{loc}(\mathbbm R^2_+)$
with trace $\nabla w_{\infty}\in L^2_{loc}(\partial\mathbbm R^2_+)$;
thus also the trace of $f$ is well-defined on $\partial\mathbbm R^2_+$. By \eqref{8.11} now the trace of $f$ is real-valued; thus $f\equiv c$ for some constant $c\in\mathbbm R$.
But $\nabla w_{\infty}\in L^2(\mathbbm R^2_+)$; hence $f\in L^1(\mathbbm R^2_+)$. It follows that $c=0$,
and $w_{\infty}$ is conformal.

With stereographic projection
$\Phi\colon B\to\mathbbm R^2_+$ from a point $z_0\in\partial B$
define the map $\bar{u}=w_{\infty}\circ\Phi\in H^{1/2}(S^1;N)$.
By conformal invariance, $\bar{u}$ again is harmonic with finite Dirichlet integral and satisfies \eqref{1.6}
on $\partial B\setminus\{z_0\}$;
since the point $\{z_0\}$ has vanishing $H^1$-capacity, $\bar{u}$ then is stationary in the sense of \cite{Gruter-et-al-1981}.
Moreover, $\bar{u}$ is conformal. For such mappings, smooth regularity on $\bar{B}$
was shown by Gr\"uter-Hildebrandt-Nitsche \cite{Gruter-et-al-1981};
thus condition \eqref{1.6} holds everywhere on $\partial B$ in the pointwise sense,
and $\bar{u}$ parametrizes a minimal surface of finite area supported by $N$ which meets $N$ orthogonally along its boundary.

\begin{proof}[Proof of Theorem \ref{thm1.1}.ii)]
For given smooth data $u_0\in H^{1/2}(S^1;N)$ let $u$ be the unique solution to \eqref{1.3}, \eqref{1.4} guaranteed by part i) of the theorem, and suppose that the maximal time of existence $T_0<\infty$. Then condition \eqref{4.4} must fail as
$t\uparrow T_0$; else from Propositions \ref{prop4.11} and \ref{prop4.6} we obtain smooth bounds for $u(t)$ as $t\uparrow T_0$ and there exists a smooth trace
$u_1=\lim_{t\uparrow T_0}u(t)$. But by the first part of the theorem there is a smooth solution to the initial value problem for \eqref{1.3} with initial data $u_1$ at time $T_0$,
and this solution extends the original solution $u$ to an interval $[0,T_1[$ for some
$T_1>T_0$, contradicting maximality of $T_0$.

Let $z^{(i)}\in B$, $1\le i\le i_0$, such that for some number $\delta>0$ and suitable $t_k^{(i)}\uparrow T_0$, $z_k^{(i)}\to z^{(i)}$, $r_k^{(i)}\to 0$ as $k\to\infty$
there holds
\begin{equation*}
\liminf_{k\to\infty}\int_{B_{r_k^{(i)}}(z_k^{(i)})\cap B}|\nabla u(t_k^{(i)})|^2dz\ge\delta.
\end{equation*}
By the argument following \eqref{8.9} thus for a suitable sequence of radii
$0<r_k^{(0)}\to 0$ such that $r_k^{(i)}/r_k^{(0)}\to 0$ as well as $(T_0-t_k^{(i)})/r_k^{(0)}\to 0$
then with error $o(1)\to 0$ as $k\to\infty$ there holds
\begin{equation*}
\int_{B_{r_k^{(0)}}(z^{(i)})\cap B}|\nabla u(t)|^2dz+o(1)
\ge\int_{B_{r_k^{(i)}}(z_k^{(i)})\cap B}|\nabla u(t_k^{(i)})|^2dz\ge\delta.
\end{equation*}
for all $T_0-r_k^{(0)}<t<T_0$, uniformly in $1\le i\le i_0$.
For sufficiently large $k\in\mathbbm N$ such that $r_k^{(0)}<\inf_{i<j}|z^{(i)}-z^{(j)}|/4$ it follows that $i_0\le E(u_0)/\delta$, and we may fix $r_0>0$ and redefine $t_k^{(i)}$,
$r_k^{(i)}$, and $z_k^{(i)}$, if necessary, such that for each $1\le i\le i_0$ there holds
\begin{equation*}
\int_{B_{r_k^{(i)}}(z_k^{(i)})\cap B}|\nabla u(t_k^{(i)})|^2dz
=\sup_{z'\in B_{r_0}(z^{(i)}),\,0<t\le t_k^{(i)}}\int_{B_{r_k^{(i)}}(z')\cap B}|\nabla u(t)|^2dz=\delta.
\end{equation*}
Moreover, we may assume that $\delta<\delta_0$, as defined in Proposition \ref{prop3.1}.
The characterization of the concentration points as in Theorem \ref{thm1.2}.ii) via solutions $\bar{u}^{(i)}$ of \eqref{1.6} then follows from our above analysis.

In addition, Corollary \ref{cor3.2} yields the uniform lower bound
\begin{equation*}
\lim_{r_0\downarrow 0}\liminf_{t\uparrow T}\int_{B_{r_0}(z^{(i)})\cap B}|\nabla u(t)|^2dz
\ge 2E(\bar{u}^{(i)})\ge 2\delta_0^2
\end{equation*}
for the concentration energy quanta, which gives the claimed upper bound for the total number of concentration points.

Finally, with the help of Proposition \ref{prop4.11} we can smoothly extend the solution $u$ to
$B\setminus\{z^{(1)},\dots,z^{(i_0)}\}$ at time $t=T_0$.
\end{proof}

\section{Asymptotics}\label{Asymptotics}
Suppose next that the solution $u$ to \eqref{1.3}, \eqref{1.4} exists for all time $0<t<\infty$.
Then $u$ either concentrates for suitable $t_k\uparrow\infty$ in the sense that condition \eqref{4.4} does not hold true uniformly in time, or $u$ satisfies uniform smooth bounds, as shown in Section \ref{Higher regularity}.

In the latter case, the claim made in Theorem \ref{thm1.1}.iii) easily follows.

\begin{proposition}\label{prop9.1}
Suppose that for any $\delta>0$ there exists $R>0$ such that condition \eqref{4.4} holds true for all $0<t<\infty$. Then there exists a smooth solution $u_{\infty}\in H^{1/2}(S^1;N)$ of
\eqref{1.6} such that $u(t)\to u_{\infty}$ smoothly as $t\to\infty$ suitably,
and $u_{\infty}$ parametrizes a minimal surface of finite area supported by $N$ which meets
$N$ orthogonally along its boundary.
\end{proposition}

\begin{proof}
For sufficiently small $\delta>0$, for any $j\in\mathbbm N$ by iterative reference to Propositions \ref{prop4.2}, \ref{prop4.4} - \ref{prop4.6},
and \ref{prop4.10}, \ref{prop4.11}, respectively, as in Section \ref{Weak solutions}
we can find constants $C_j>0$ such that $\|u(t)\|_{H^j(B)}\le C_j$ for all $t>1$, Moreover,
by the energy inequality Lemma \ref{lemma2.1} for a suitable sequence $t_k\to\infty$
there holds $u_t(t_k)\to 0$ in $L^2(\partial B)$ as $k\to\infty$. Then for any $j\in\mathbbm N$ a subsequence $u(t_k)\to u_{\infty}$ in $H^j(B)$, and a diagonal subsequence converges smoothly,
where $u_{\infty}$ solves \eqref{1.6}. By the argument after \eqref{8.11} in Section \ref{Blow-up} then $u_{\infty}$ is conformal and $u_{\infty}$ parametrizes a minimal surface with free boundary on $N$ which meets $N$ orthogonally along its boundary.
\end{proof}

In the remaining case that for some $\delta>0$ condition \eqref{4.4} fails to hold, there exists a sequence $t_k\uparrow\infty$ and points $z^{(1)},\dots,z^{(i_0)}$ such that for sequences
$z_k^{(i)}\to z^{(i)}$, radii $r_k^{(i)}\to 0$ as $k\to\infty$ there holds
\begin{equation*}
\liminf_{k\to\infty}\int_{B_{r_k^{(i)}}(z_k^{(i)})\cap B}|\nabla u(t_k)|^2dz\ge\delta,\
1\le i\le i_0.
\end{equation*}
By Lemma \ref{lemma2.1} there holds the a-priori bound $i_0\le E(u_0)/\delta$ for the number of concentration points. By the argument leading to \eqref{8.10} then for a suitable number
$0<r_0\le\inf_{i<j}|z^{(i)}-z^{(j)}|/4$ with error $o(1)\to0$ as $k\to\infty$
and with some constant $L\in\mathbbm N$ for all $1\le i\le i_0$ there holds
\begin{equation*}
\begin{split}
L\int_{B_{2r_k^{(i)}}(z_k^{(i)})\cap B}&|\nabla u(t_k)|^2dz+o(1)\\
&\ge\sup_{z_0\in B_{r_0}(z_k^{(i)}),\,t_k-r_0\le t\le t_k}
\int_{B_{r_k^{(i)}}(z_0)\cap B}|\nabla u(t)|^2dz\ge\delta.
\end{split}
\end{equation*}
Fixing any index $1\le i\le i_0$ and renaming $z_k^{(i)}=:z_k$, $r_k^{(i)}=:r_k$, we then scale
\begin{equation*}
u_k(z,t)=u(z_k+r_kz,t_k+r_kt), \ z\in\Omega_k=\{z;z_k+r_kz\in B\},\ -t_k/r_k\le t\le 0,
\end{equation*}
as before and observe that for any $t_0<0$ there holds
\begin{equation}\label{9.1}
\begin{split}
\int_{t_0}^0&\int_{\partial\Omega_k}|\partial_tu_k|^2ds\;dt
=\int_{t_0}^0\int_{\partial\Omega_k}|d\pi_N(u_k)\partial_{\nu_k}u_k|^2ds\;dt\\
&=\int_{t_k+r_kt_0}^{t_k}\int_{\partial B}|u_t|^2d\phi\;dt
\le\int_{t_k+r_kt_0}^{\infty}\int_{\partial B}|u_t|^2d\phi\;dt\to 0
\end{split}
\end{equation}
as $k\to\infty$, where $\nu_k$ is the outward unit normal along $\partial\Omega_k$.
Just as in Section \ref{Blow-up} for suitable $t_0<s_k<0$ we then obtain local uniform and $H^1$-convergence of a subsequence of the conformally rescaled maps
$w_k=u_k(s_k)\circ\Phi_k\in H^1_{loc}(\mathbbm R^2_+)$ to a smooth, harmonic and conformal limit
$w_{\infty}$ with finite energy and continuously mapping $\partial\mathbbm R^2_+$ to $N$, inducing a solution $\bar{u}_{\infty}=w_{\infty}\circ\Phi\in H^{1/2}(S^1;N)$ of \eqref{1.6}
corresponding to a minimal surface with free boundary on $N$.
This ends the proof of Theorem \ref{thm1.1}.iii)

\section{Appendix}
In this section, for the convenience of the reader we derive two interpolation inequalities that play a crucial role in our arguments.

Let $v\in H^1(B)$, and let $\varphi_{z_i,r}$ as above such that the collection of balls $B_r(z_i)$, $1\le i\le i_0$ covers $\bar{B}$ with at most $L$ balls $B_{2r}(z_i)$
overlapping at any $z\in B$, with $L\in\mathbbm N$ independent of $r>0$.
We may assume $r<1/8$ so that for any $1\le i\le i_0$ there is a pair of orthogonal vectors $e_{1,i}$, $e_{2,i}$ such that for any $z\in B_r(z_i)$ there holds
$z+se_{1,i}+te_{2,i}\in B$ for any $0\le s,t\le 2r$. After a rotation of coordinates,
we may assume that $e_{1,i}=(1,0)$, $e_{2,i}=(0,1)$ are the standard basis vectors.
Writing $\varphi$ for $\varphi_{z_i,r}$ for any $z=(x,y)\in B_r(z_i)$,
by arguing as Ladyzhenskaya \cite{Ladyzhenskaya-1969}, using that
\begin{equation*}
(v^2\varphi)(x+2r,y)=0=(v^2\varphi)(x,y+2r),
\end{equation*}
then we can estimate
\begin{equation}\label{A.1}
\begin{split}
v^4(z)&=|(v^2\varphi)(z)|^2\le\int_0^{2r}|\partial_x(v^2\varphi)(x+s,y)|ds
\cdot\int_0^{2r}|\partial_y(v^2\varphi)(x,y+t)|dt\\
&\le\int_{\{s;(s,y)\in B\}}|\partial_x(v^2\varphi)(s,y)|ds
\cdot\int_{\{t;(x,t)\in B\}}|\partial_y(v^2\varphi)(x,t)|dt,
\end{split}
\end{equation}
and with the help of Fubini's theorem we find
\begin{equation*}
\begin{split}
&\int_{B_r(z_i)}|v|^4dz\le\int_B|v|^4\varphi^2dz
\le\int_{-\infty}^{\infty}\big(\int_{\{x;(x,y)\in B\}}|(v^2\varphi)(x,y)|^2dx\big)dy\\
&\le\int_{-\infty}^{\infty}\int_{\{s;(s,y)\in B\}}|\partial_x(v^2\varphi)(s,y)|ds\,dy
\cdot\int_{-\infty}^{\infty}\int_{\{t;(x,t)\in B\}}|\partial_y(v^2\varphi)(x,t)|dt\,dx\\
&\le\big(\int_B|\nabla(v^2\varphi)|dz\big)^2
\le\big(\int_B(2|\nabla v||v\varphi|+v^2|\nabla\varphi|)dz\big)^2\\
&\le C\big(\int_{B_{2r}(z_i)}|\nabla v|^2dz+r^{-2}\int_{B_{2r}(z_i)}v^2dz\big)
\int_{B_{2r}(z_i)}v^2dz.
\end{split}
\end{equation*}
Fixing $r=1/5$ and summing over $1\le i\le i_0$ with an absolute constant $C>0$
we obtain the bound
\begin{equation}\label{A.2}
\begin{split}
\|v\|^4_{L^4(B)}
\le C\|v\|^2_{H^1(B)}\|v\|^2_{L^2(B)}
\end{split}
\end{equation}
for any $v\in H^1(B)$.

\end{document}
\title{\bf Decentralized adaptation in interconnected uncertain systems with nonlinear parametrization}

\begin{abstract}
We propose a technique for the design and analysis of decentralized adaptation algorithms in interconnected dynamical systems. Our technique does not require Lyapunov stability of the target dynamics and allows nonlinearly parameterized uncertainties. We show that for the considered class of systems,
conditions for reaching the control goals can be formulated in terms of the nonlinear $L_2$-gains of target dynamics of each interconnected subsystem. Equations for decentralized controllers and corresponding adaptation algorithms are also explicitly provided.

{\it Keywords:} nonlinear parametrization; unstable,
non-equilibrium dynamics; decentralized adaptive control; monotone functions
\end{abstract}

\section*{Notation}

According to the standard convention, $\mathbb{R}$ defines the field of real numbers and $\mathbb{R}_{\geq c}=\{x\in\mathbb{R}|x\geq c\}$,
$\mathbb{R}_{+}=\mathbb{R}_{\geq 0}$; symbol $\mathbb{R}^n$ stands for a linear space $\mathcal{L}(\mathbb{R})$ over the field of reals with
$\mathrm{dim}\{\mathcal{L}(\mathbb{R})\}=n$; $\|\mathbf{x}\|$ denotes the Euclidian norm of $\mathbf{x}\in\mathbb{R}^n$; $\mathcal{C}^k$ denotes the space of functions that are at least $k$ times differentiable;
$\mathcal{K}$ denotes the class of all strictly increasing functions $\kappa: \mathbb{R}_+\rightarrow \mathbb{R}_+$ such that
$\kappa(0)=0$. By ${L}_{p}^n[t_0,T]$, where $T>0$, $p\geq 1$ we denote the space of all functions $\mathbf{f}:\mathbb{R}_+\rightarrow\mathbb{R}^n$
such that
$\|\mathbf{f}\|_{p,[t_0,T]}=\left(\int_{0}^T\|\mathbf{f}(\tau)\|^{p}d\tau\right)^{1/p}<\infty$;
$\|\mathbf{f}\|_{p,[t_0,T]}$ denotes the ${L}_{p}^n[t_0,T]$-norm of
$\mathbf{f}(t)$. By ${L}^n_\infty[t_0,T]$ we denote the space of all functions $\mathbf{f}:\mathbb{R}_+\rightarrow\mathbb{R}^n$ such that
$\|\mathbf{f}\|_{\infty,[t_0,T]}={\mathrm{ess}} \sup\{\|\mathbf{f}(t)\|,t \in
[t_0,T]\}<\infty$, and $\|\mathbf{f}\|_{\infty,[t_0,T]}$ stands for the
${L}^n_\infty[t_0,T]$ norm of $\mathbf{f}(t)$.

A function $\mathbf{f}(\mathbf{x}): \mathbb{R}^{n}\rightarrow \mathbb{R}^m$ is said to be locally bounded if for any $\|\mathbf{x}\|<\delta$ there exists a constant $D(\delta)>0$ such that the following inequality holds:
$\|\mathbf{f}(\mathbf{x})\|\leq D(\delta)$. Let $\Gamma$ be an $n\times n$
square matrix, then $\Gamma>0$ denotes a positive definite
(symmetric) matrix, and $\Gamma^{-1}$ is the inverse of $\Gamma$.
By $\Gamma\geq 0$ we denote a positive semi-definite matrix,
$\|\mathbf{x}\|_{\Gamma}^2$ to denotes the quadratic form:
$\mathbf{x}^{T}\Gamma\mathbf{x}$, $\mathbf{x}\in\mathbb{R}^n$. The notation $|\cdot|$
stands for the modulus of a scalar. The solution of a system of differential equations $\dot{\mathbf{x}}=\mathbf{f}(\mathbf{x},t,{\boldsymbol{\theta}},\mathbf{u}), \
\mathbf{x}(t_0)=\mathbf{x}_0$, $\mathbf{u}:\mathbb{R}_+\rightarrow\mathbb{R}^m$,
${\boldsymbol{\theta}}\in\mathbb{R}^d$ for $t\geq t_0$ will be denoted as
$\mathbf{x}(t,\mathbf{x}_0,t_0,{\boldsymbol{\theta}},\mathbf{u})$, or simply as $\mathbf{x}(t)$ if it is clear from the context what the values of $\mathbf{x}_0,{\boldsymbol{\theta}}$
are and how the function $\mathbf{u}(t)$ is defined.

Let $\mathbf{u}:\mathbb{R}^n\times\mathbb{R}^d\times\mathbb{R}_+\rightarrow\mathbb{R}^m$ be a function of state $\mathbf{x}$, parameters $\hat{{\boldsymbol{\theta}}}$, and time
$t$. Let in addition both $\mathbf{x}$ and $\hat{{\boldsymbol{\theta}}}$ be functions of $t$. Then in case the arguments of $\mathbf{u}$ are clearly defined by the context, we will simply write $\mathbf{u}(t)$ instead of
$\mathbf{u}(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)$.

The (forward complete) system
$\dot{\mathbf{x}}=\mathbf{f}(\mathbf{x},t,{\boldsymbol{\theta}},\mathbf{u}(t))$, is said to have an
$L_{p}^m [t_0,T]\mapsto L_{q}^n[t_0,T]$, gain ($T\geq t_0$,
$p,q\in\mathbb{R}_{\geq 1}\cup\infty$) with respect to its input
$\mathbf{u}(t)$ if and only if $\mathbf{x}(t,\mathbf{x}_0,t_0,{\boldsymbol{\theta}},\mathbf{u}(t))\in L_{q}^n [t_0,T]$ for any $\mathbf{u}(t)\in L_{p}^m [t_0,T]$ and there exists a function
$\gamma_{q,p}:\mathbb{R}^n\times\mathbb{R}^d\times\mathbb{R}_+\rightarrow\mathbb{R}_+$
such that the following inequality holds:
$\|\mathbf{x}(t)\|_{q,[t_0,T]}\leq
\gamma_{q,p}(\mathbf{x}_0,{\boldsymbol{\theta}},\|\mathbf{u}(t)\|_{p,[t_0,T]})$. The function $\gamma_{q,p}(\mathbf{x}_0,{\boldsymbol{\theta}},\|\mathbf{u}(t)\|_{p,[t_0,T]})$
is assumed to be non-decreasing in $\|\mathbf{u}(t)\|_{p,[t_0,T]}$, and locally bounded in its arguments.

For notational convenience when dealing with vector fields and partial derivatives we will use the following extended notion of the Lie derivative of a function. Let $\mathbf{x}\in\mathbb{R}^n$ and assume
$\mathbf{x}$ can be partitioned as follows $\mathbf{x}=\mathbf{x}_1\oplus\mathbf{x}_2$,
where $\mathbf{x}_1\in\mathbb{R}^q$, $\mathbf{x}_1=(x_{11},\dots,x_{1q})^T$,
$\mathbf{x}_2\in\mathbb{R}^p$, $\mathbf{x}_2=(x_{21},\dots,x_{2p})^T$, $q+p=n$, and
$\oplus$ denotes the concatenation of two vectors. Define
$\mathbf{f}:\mathbb{R}^{n}\rightarrow\mathbb{R}^n$ such that
$\mathbf{f}(\mathbf{x})=\mathbf{f}_1(\mathbf{x})\oplus\mathbf{f}_2(\mathbf{x})$, where
$\mathbf{f}_1:\mathbb{R}^n\rightarrow\mathbb{R}^q$,
$\mathbf{f}_1(\cdot)=(f_{11}(\cdot),\dots,f_{1q}(\cdot))^T$,
$\mathbf{f}_2:\mathbb{R}^n\rightarrow\mathbb{R}^p$,
$\mathbf{f}_2(\cdot)=(f_{21}(\cdot),\dots,f_{2p}(\cdot))^T$. Then
$L_{\mathbf{f}_i(\mathbf{x})}\psi(\mathbf{x},t)$, $i\in\{1,2\}$ denotes the Lie derivative of the function $\psi(\mathbf{x},t)$ with respect to the vector field $\mathbf{f}_i(\mathbf{x},{\boldsymbol{\theta}})$:
$L_{\mathbf{f}_i(\mathbf{x})}\psi(\mathbf{x},t)=\sum_{j}^{\dim{\mathbf{x}_i}}\frac{{\partial}
\psi(\mathbf{x},t) }{{\partial} x_{ij}}f_{ij}(\mathbf{x},{\boldsymbol{\theta}})$.

\section{Introduction}

We consider the problem how to control the behavior of complex dynamical systems composed of interconnected lower-dimensional subsystems. Centralized control of these systems is practically inefficient because of high demands for computational power,
measurements and prohibitive communication cost. On the other hand, standard decentralized solutions often face severe limitations due to the deficiency of information about the interconnected subsystems. In addition, the nature of their their interconnections may vary depending on conditions in the environment. In order to address these problems in their most general setup, decentralized adaptive control is needed.

Currently there is a large literature on decentralized adaptive control which contains successful solutions to problems of adaptive stabilization \cite{Gavel_1989,Jain_1997}, tracking
\cite{Ioannou86,Jain_1997,Shi_1992,Passino96}, and output regulation \cite{Jiang_2000,Huang_2003} of linear and nonlinear systems. In most of these cases the problem of decentralized control is solved within the conventional framework of adaptive stabilization/tracking/regulation by a family of linearly parameterized controllers. While these results may be successfully implemented in a large variety of technical and artificial systems, there is room for further improvements. In particular,
when the target dynamics of the systems is not stable in the Lyapunov sense but intermittent, meta-stable, or multi-stable
\cite{Arecchi_2004,Raffone_2003,Tsuda_2004} or when the uncertainties are nonlinearly parameterized
\cite{Armstrong_1993,Boskovic_1995,Canudas_1999,Kitching_2000},
and no domination of the uncertainties by feedback is allowed.

In the present article we address these issues at once for a class of nonlinear dynamical systems. Our contribution is that we provide conditions ensuring forward-completeness, boundedness and asymptotic reaching of the goal for a pair of interconnected systems with uncertain coupling and parameters. Our method does not require availability of a Lyapunov function for the desired motions in each subsystem, nor linear parametrization of the controllers. Our results can straightforwardly be extended to interconnection of arbitrary many (but still, a finite number of)
subsystems. Explicit equations for corresponding decentralized adaptive controllers are also provided.

The paper is organized as follows. In Section 2 we provide a formal statement of the problem, Section 3 contains necessary preliminaries and auxiliary results. In Section 4 we present the main results of our current contribution, and in Section 5 we provide concluding remarks to our approach.

\section{Problem Formulation}

Let us consider two interconnected systems $\mathcal{S}_x$ and
$\mathcal{S}_y$:
\begin{eqnarray}
&\mathcal{S}_x: & \
\dot{\mathbf{x}}=\mathbf{f}(\mathbf{x},{\boldsymbol{\theta}}_x)+\gamma_y(\mathbf{y},t)+
\mathbf{g}(\mathbf{x})u_x \label{eq:system:s1} \\
&\mathcal{S}_y: & \
\dot{\mathbf{y}}=\mathbf{q}(\mathbf{y},{\boldsymbol{\theta}}_y)+\gamma_x(\mathbf{x},t)+\mathbf{z}(\mathbf{y})u_y\label{eq:system:s2}
\end{eqnarray}
where $\mathbf{x}\in\mathbb{R}^{n_x}$, $\mathbf{y}\in\mathbb{R}^{n_y}$ are the state vectors of systems $\mathcal{S}_x$ and $\mathcal{S}_y$, vectors
${\boldsymbol{\theta}}_x\in\mathbb{R}^{n_{\theta_x}}$,
${\boldsymbol{\theta}}_y\in\mathbb{R}^{n_{\theta_y}}$ are unknown parameters,
functions
$\mathbf{f}:\mathbb{R}^{n_x}\times\mathbb{R}^{n_{\theta_x}}\rightarrow\mathbb{R}^{n_x}$,
$\mathbf{q}:\mathbb{R}^{n_y}\times\mathbb{R}^{n_{\theta_y}}\rightarrow\mathbb{R}^{n_y}$,
$\mathbf{g}:\mathbb{R}^{n_x}\rightarrow\mathbb{R}^{n_x}$,
$\mathbf{z}:\mathbb{R}^{n_y}\rightarrow\mathbb{R}^{n_y}$ are continuous and locally bounded. Functions
$\gamma_y:\mathbb{R}^{n_y}\times\mathbb{R}_+\rightarrow\mathbb{R}_n$,
$\gamma_x:\mathbb{R}^{n_x}\times\mathbb{R}_+\rightarrow\mathbb{R}^{n_y}$, stand for nonlinear, non-stationary and, in general, unknown couplings between systems $\mathcal{S}_x$ and $\mathcal{S}_y$, and
$u_x\in\mathbb{R}$, $u_y\in\mathbb{R}$ are the control inputs.

In the present paper we are interested in the following problem

\begin{problem}\label{problem:decentralized}\normalfont Let $\psi_x:\mathbb{R}^{n_x}\times\mathbb{R}_+\rightarrow\mathbb{R}$,
$\psi_y:\mathbb{R}^{n_y}\times\mathbb{R}_+\rightarrow\mathbb{R}$ be the goal functions for systems $\mathcal{S}_x$, $\mathcal{S}_y$
respectively. In the other words, for some values
$\varepsilon_x\in\mathbb{R}_{+}$, $\varepsilon_y\in\mathbb{R}_+$ and time instant $t^\ast\in\mathbb{R}_+$, inequalities
\begin{equation}\label{eq:goal_functionals}
\|\psi_x(\mathbf{x}(t),t)\|_{\infty,[t^\ast,\infty]}\leq\varepsilon_x, \
\|\psi_y(\mathbf{y}(t),t)\|_{\infty,[t^\ast,\infty]}\leq\varepsilon_y
\end{equation}
specify the desired state of interconnection (\ref{eq:system:s1}),
(\ref{eq:system:s2}). Derive functions $u_x(\mathbf{x},t)$, $u_y(\mathbf{y},t)$
such that for all ${\boldsymbol{\theta}}_x\in\mathbb{R}^{n_{\theta_x}}$,
${\boldsymbol{\theta}}_y\in\mathbb{R}^{n_{\theta_y}}$

1) interconnection (\ref{eq:system:s1}), (\ref{eq:system:s2}) is forward-complete;

2) the trajectories $\mathbf{x}(t)$, $\mathbf{y}(t)$ are bounded;

3) for given values of $\varepsilon_x$, $\varepsilon_y$, some
$t^\ast\in\mathbb{R}_+$ exists such that inequalities
(\ref{eq:goal_functionals}) are satisfied or, possibly, both functions $\psi_x(\mathbf{x}(t),t)$, $\psi_y(\mathbf{y}(t),t)$ converge to zero as $t\rightarrow\infty$.

Function $u_x(\cdot)$ should not depend explicitly on $\mathbf{y}$ and,
symmetrically, function $u_y(\cdot)$ should not depend explicitly on $\mathbf{x}$. The general structure of the desired configuration of the control scheme is provided in Figure 1.
\end{problem}

\begin{figure}
\begin{center}
\includegraphics[width=110pt]{decentralized.eps}
\end{center}
\begin{center}
\caption{General structure of interconnection}\label{fig:decentralized:singularity}
\end{center}
\end{figure}

In the next sections we provide sufficient conditions, ensuring solvability of Problem \ref{problem:decentralized} and we also explicitly derive functions $u_x(\mathbf{x},t)$ and $u_y(\mathbf{y},t)$ which satisfy requirements 1) -- 3) of Problem
\ref{problem:decentralized}. We start with the introduction of a new class of adaptive control schemes and continue by providing the input-output characterizations of the controlled systems.
These results are given in Section \ref{sec:preliminary}. Then,
using these characterizations, in Section \ref{sec:main} we provide the main results of our study.

\section{Assumptions and properties of the decoupled systems}\label{sec:preliminary}

Let the following system be given:
\begin{equation}\label{system1}
\begin{split}
\dot{\mathbf{x}}_1=&\mathbf{f}_1(\mathbf{x})+\mathbf{g}_1(\mathbf{x})u, \\
\dot{\mathbf{x}}_2=&\mathbf{f}_2(\mathbf{x},{\boldsymbol{\theta}})+\mathbf{g}_2(\mathbf{x})u,
\end{split}
\end{equation}
where
\[
\mathbf{x}_1=(x_{11},\dots,x_{1 q})^T\in \mathbb{R}^q; \
\mathbf{x}_2=(x_{21},\dots,x_{2 p})^T\in \mathbb{R}^p;
\]
\[
\mathbf{x}=(x_{11},\dots,x_{1 q},x_{21},\dots,x_{2 p})^T\in \mathbb{R}^{n}
\]
${\boldsymbol{\theta}}\in \Omega_\theta\in \mathbb{R}^d$ is a vector of unknown parameters, and $\Omega_\theta$ is a closed bounded subset of
$\mathbb{R}^d$; $u\in\mathbb{R}$ is the control input, and functions
$\mathbf{f}_1:\mathbb{R}^{n}\rightarrow \mathbb{R}^{q}$,
$\mathbf{f}_2:\mathbb{R}^{n}\times\mathbb{R}^d\rightarrow \mathbb{R}^{p}$,
$\mathbf{g}_1:\mathbb{R}^{n}\rightarrow \mathbb{R}^q$,
$\mathbf{g}_2:\mathbb{R}^{n}\rightarrow\mathbb{R}^{p}$ are continuous and locally bounded. The vector $\mathbf{x}\in\mathbb{R}^n$ is the state vector, and vectors $\mathbf{x}_1$, $\mathbf{x}_2$ are referred to as {\it uncertainty-independent} and {\it uncertainty-dependent} partition of $\mathbf{x}$, respectively. For the sake of compactness we will also use the following description of (\ref{system1}):
\begin{equation}\label{system}
\dot{\mathbf{x}}=\mathbf{f}(\mathbf{x},{\boldsymbol{\theta}})+\mathbf{g}(\mathbf{x})u,
\end{equation}
where
\[
\mathbf{g}(\mathbf{x})=(g_{11}(\mathbf{x}),\dots,g_{1q}(\mathbf{x}),g_{21}(\mathbf{x}),\dots,g_{2 p}(\mathbf{x}))^{T},
\]
\[
\mathbf{f}(\mathbf{x})=(f_{11}(\mathbf{x}),\dots,f_{1q}(\mathbf{x}),f_{21}(\mathbf{x},{\boldsymbol{\theta}}),\dots,f_{2 p}(\mathbf{x},{\boldsymbol{\theta}}))^{T}.
\]

As a measure of closeness of trajectories $\mathbf{x}(t)$ to the desired state we introduce the error or goal function $\psi:\mathbb{R}^n\times
\mathbb{R}_+\rightarrow \mathbb{R}, \ \psi\in \mathcal{C}^1$.
We suppose also that for the chosen function $\psi(\mathbf{x},t)$
satisfies the following:
\begin{assume}[Target operator]\label{assume:psi} For the given function $\psi(\mathbf{x},t)\in \mathcal{C}^1$ the following property holds:
\begin{equation}\label{eq:assume_psi}
\|\mathbf{x}(t)\|_{\infty,[t_0,T]}\leq
\tilde{\gamma}\left(\mathbf{x}_0,{\boldsymbol{\theta}},\|\psi(\mathbf{x}(t),t)\|_{\infty,[t_0,T]}\right)
\end{equation}
where
$\tilde{\gamma}\left(\mathbf{x}_0,{\boldsymbol{\theta}},\|\psi(\mathbf{x}(t),t)\|_{\infty,[t_0,T]}\right)$
is a locally bounded and non-negative function of its arguments.
\end{assume}
Assumption \ref{assume:psi} can be interpreted as a sort of {\it unboundedness observability} property \cite{Jiang_1994} of system
(\ref{system1}) with respect to the ``output" function
$\psi(\mathbf{x},t)$. It can also be viewed as a {\it bounded input -
bounded state} assumption for system (\ref{system1}) along the constraint
$\psi(\mathbf{x}(t,\mathbf{x}_0,t_0,{\boldsymbol{\theta}},u(\mathbf{x}(t),t)),t)=\upsilon(t)$,
where the signal $\upsilon(t)$ serves as a new input. If, however,
boundedness of the state is not explicitly required (i.e. it is guaranteed by additional control or follows from the physical properties of the system itself), Assumption \ref{assume:psi} can be removed from the statements of our results.

Let us specify a class of control inputs $u$ which can ensure boundedness of $\mathbf{x}(t,\mathbf{x}_0,t_0,{\boldsymbol{\theta}},u)$ for every
${\boldsymbol{\theta}}\in \Omega_\theta$ and $\mathbf{x}_0\in\mathbb{R}^n$. According to
(\ref{eq:assume_psi}), boundedness of
$\mathbf{x}(t,\mathbf{x}_0,t_0,{\boldsymbol{\theta}},u)$ is ensured if we find a control input $u$ such that $\psi(\mathbf{x}(t),t)\in L_\infty^1[t_0,\infty]$.
For this objective consider the dynamics of system (\ref{system})
with respect to $\psi(\mathbf{x},t)$:
\begin{equation}\label{dpsi}
\dot{\psi}=L_{\mathbf{f}(\mathbf{x},{\boldsymbol{\theta}})}\psi(\mathbf{x},t)+L_{\mathbf{g}(\mathbf{x})}\psi(\mathbf{x},t)u+\frac{{\partial}
\psi(\mathbf{x},t)}{{\partial} t},
\end{equation}
Assuming that the inverse
$\left(L_{\mathbf{g}(\mathbf{x})}\psi(\mathbf{x},t)\right)^{-1}$ exists everywhere,
we may choose the control input $u$ in the following class of functions:
\begin{equation}\label{control}
\begin{split}
u(\mathbf{x},\hat{\boldsymbol{\theta}},{\boldsymbol{\omega}},t)&=\frac{1}{L_{\mathbf{g}(\mathbf{x})}\psi(\mathbf{x},t)}\left(-L_{\mathbf{f}(\mathbf{x},\hat{{\boldsymbol{\theta}}})}\psi(\mathbf{x},t)-\varphi(\psi,{\boldsymbol{\omega}},t)-\frac{{\partial}\psi(\mathbf{x},t)}{{\partial} t}\right) \\
& \ \varphi: \ \mathbb{R}\times\mathbb{R}^w\times\mathbb{R}_+\rightarrow\mathbb{R}
\end{split}
\end{equation}
where ${\boldsymbol{\omega}}\in\Omega_\omega\subset\mathbb{R}^w$ is a vector of
{\it known} parameters of the function
$\varphi(\psi,{\boldsymbol{\omega}},t)$. Denoting
$L_{\mathbf{f}(\mathbf{x},{\boldsymbol{\theta}})}\psi(\mathbf{x},t)=f(\mathbf{x},{\boldsymbol{\theta}},t)$ and taking into account (\ref{control}) we may rewrite equation
(\ref{dpsi}) in the following manner:
\begin{equation}\label{error_model}
{\dot\psi}=f(\mathbf{x},{\boldsymbol{\theta}},t)-f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)-\varphi(\psi,{\boldsymbol{\omega}},t)
\end{equation}

For the purpose of the present article, instead of
(\ref{error_model}) it is worthwhile to consider the extended equation:
\begin{equation}\label{error_model_d}
{\dot\psi}=f(\mathbf{x},{\boldsymbol{\theta}},t)-f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)-\varphi(\psi,{\boldsymbol{\omega}},t)+\varepsilon(t),
\end{equation}
where, if not stated overwise, the function
$\varepsilon:\mathbb{R}_+\rightarrow\mathbb{R}$, $\varepsilon\in L_{2}^1
[t_0,\infty]\cap C^0$. One of the immediate advantages of equation
(\ref{error_model_d}) in comparison with (\ref{error_model}) is that it allows us to take the presence of coupling between interconnected systems into consideration.

Let us now specify the desired properties of the function
$\varphi(\psi,{\boldsymbol{\omega}},t)$ in (\ref{control}),
(\ref{error_model_d}). The majority of known algorithms for parameter estimation and adaptive control
\cite{Kokotovich95,Fradkov99,Narendra89,Sastry89} assume global
(Lyapunov) stability of system
(\ref{error_model_d}) for ${\boldsymbol{\theta}}\equiv\hat{{\boldsymbol{\theta}}}$. In our study, however, we refrain from this standard, restrictive requirement. Instead we propose that finite energy of the signal
$f(\mathbf{x}(t),{\boldsymbol{\theta}},t)-f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)$, defined for example by its $L_{2}^1[t_0,\infty]$ norm with respect to the variable $t$, results in finite deviation from the target set given by the equality $\psi(\mathbf{x},t)=0$. Formally this requirement is introduced in Assumption \ref{assume:gain}:
\begin{assume}[Target dynamics operator]\label{assume:gain} Consider the following system:
\begin{equation}\label{eq:target_dynamics}
{\dot\psi}=-\varphi(\psi,{\boldsymbol{\omega}},t)+\zeta(t),
\end{equation}
where $\zeta:\mathbb{R}_+\rightarrow\mathbb{R}$ and
$\varphi(\psi,{\boldsymbol{\omega}},t)$ is defined in (\ref{error_model_d}).
Then for every ${\boldsymbol{\omega}}\in\Omega_\omega$ system
(\ref{eq:target_dynamics}) has $L_{2}^1 [t_0,\infty]\mapsto L_\infty^1[t_0,\infty]$ gain with respect to input $\zeta(t)$. In other words, there exists a function $\gamma_{\infty,2}$ such that
\begin{equation}\label{eq:gain_psi_L2}
\|\psi(t)\|_{\infty,[t_0,T]}\leq
\gamma_{\infty,2}(\psi_0,{\boldsymbol{\omega}},\|\zeta(t)\|_{2,[t_0,T]}), \ \
\forall \ \zeta(t)\in L_{2}^1[t_0,T]
\end{equation}
\end{assume}
In contrast to conventional approaches, Assumption
\ref{assume:gain} does not require global {\it asymptotic stability} of the origin of the unperturbed (i.e for $\zeta(t)=0$)
system (\ref{eq:target_dynamics}). When the stability of the target dynamics ${\dot\psi}=-\varphi(\psi,{\boldsymbol{\omega}},t)$ is known a-priori, one of the benefits of Assumption \ref{assume:gain} is that there is no need to know a {\it particular Lyapunov function}
of the unperturbed system.

So far we have introduced basic assumptions on system
(\ref{system1}) and the class of feedback considered in this article. Let us now specify the class of functions
$f(\mathbf{x},{\boldsymbol{\theta}},t)$ in (\ref{error_model_d}). Since general parametrization of function $f(\mathbf{x},{\boldsymbol{\theta}},t)$ is methodologically difficult to deal with, but solutions provided for nonlinearities with convenient linear re-parametrization often yield physically implausible models and large number of unknown parameters, we have opted for a new class of parameterizations.
As a candidate for such a parametrization we suggest nonlinear functions that satisfy the following assumption:
\begin{assume}[Monotonicity and Growth Rate in Parameters]\label{assume:alpha}For the given function
$f(\mathbf{x},{\boldsymbol{\theta}},t)$ in (\ref{error_model_d}) there exists function $\boldsymbol{\alpha}(\mathbf{x},t): \mathbb{R}^{n}\times \mathbb{R}_+\rightarrow
\mathbb{R}^d, \ \boldsymbol{\alpha}(\mathbf{x},t)\in \mathcal{C}^1$ and positive constant $D>0$ such that
\begin{equation}\label{eq:assume_alpha}
(f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)-f(\mathbf{x},{\boldsymbol{\theta}},t))(\boldsymbol{\alpha}(\mathbf{x},t)^{T}(\hat{{\boldsymbol{\theta}}}-{\boldsymbol{\theta}}))\geq0
\end{equation}
\begin{equation}\label{eq:assume_gamma}
|f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)-f(\mathbf{x},{\boldsymbol{\theta}},t)|\leq D
|\boldsymbol{\alpha}(\mathbf{x},t)^{T}(\hat{{\boldsymbol{\theta}}}-{\boldsymbol{\theta}})|
\end{equation}
\end{assume}
This set of conditions naturally extends from systems that are linear in parameters to those with nonlinear parametrization.
Examples and models of physical and artificial systems which satisfy Assumption \ref{assume:alpha} (at least for bounded
${\boldsymbol{\theta}},\hat{{\boldsymbol{\theta}}}\in \Omega_\theta$) can be found in the following references
\cite{Armstrong_1993,Boskovic_1995,Canudas_1999,Abbott_2001,Kitching_2000}.
Assumption \ref{assume:alpha} bounds the growth rate of the difference $|f(\mathbf{x},{\boldsymbol{\theta}},t)-f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)|$ by the functional
$D|\boldsymbol{\alpha}(\mathbf{x},t)^{T}(\hat{{\boldsymbol{\theta}}}-{\boldsymbol{\theta}})|$.
In addition, it might also be useful to have an estimate of
$|f(\mathbf{x},{\boldsymbol{\theta}},t)-f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)|$ from below, as specified in Assumption \ref{assume:alpha_upper}:
\begin{assume}\label{assume:alpha_upper} For the given function
$f(\mathbf{x},{\boldsymbol{\theta}},t)$ in (\ref{error_model_d}) and function
$\boldsymbol{\alpha}(\mathbf{x},t)$, satisfying Assumption \ref{assume:alpha},
there exists a positive constant $D_1>0$ such that
\begin{equation}\label{eq:assume_alpha_upper}
|f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)-f(\mathbf{x},{\boldsymbol{\theta}},t)|\geq D_1
|\boldsymbol{\alpha}(\mathbf{x},t)^{T}(\hat{{\boldsymbol{\theta}}}-{\boldsymbol{\theta}})|
\end{equation}
\end{assume}

\noindent In problems of adaptation, parameter and optimization estimation, effectiveness of the algorithms often depends on how
"good" the nonlinearity $f(\mathbf{x},{\boldsymbol{\theta}},t)$ is, and how predictable is the system's behavior. As a measure of goodness and predictability usually the substitutes as smoothness and boundedness are considered. In our study, we distinguish several of such specific properties of the functions $f(\mathbf{x},{\boldsymbol{\theta}},t)$
and $\varphi(\psi,{\boldsymbol{\omega}},t)$. These properties are provided below.

\begin{hyp}\label{hyp:locally_bound_uniform_f} The function $f(\mathbf{x},{\boldsymbol{\theta}},t)$ is locally bounded with respect to $\mathbf{x}$, ${{\boldsymbol{\theta}}}$ uniformly in $t$.
\end{hyp}

\begin{hyp}\label{hyp:locally_bound_uniform_df} The function $f(\mathbf{x},{\boldsymbol{\theta}},t)\in \mathcal{C}^1$, and $ {\partial}
{f(\mathbf{x},{\boldsymbol{\theta}},t)}/{{\partial} t}$ is locally bounded with respect to
$\mathbf{x}$, ${{\boldsymbol{\theta}}}$ uniformly in $t$.
\end{hyp}

\begin{hyp}\label{hyp:locally_bound_uniform_phi} The function $\varphi(\psi,{\boldsymbol{\omega}},t)$ is locally bounded in $\psi$,
${\boldsymbol{\omega}}$ uniformly in $t$.
\end{hyp}

Let us show that under an additional structural requirement, which relates properties of the function $\boldsymbol{\alpha}(\mathbf{x},t)$ and vector-field
$\mathbf{f}(\mathbf{x},{\boldsymbol{\theta}})=\mathbf{f}_1(\mathbf{x},{\boldsymbol{\theta}})\oplus\mathbf{f}_2(\mathbf{x},{\boldsymbol{\theta}})$
in (\ref{system1}), (\ref{system}), there exist adaptive algorithms ensuring that the following desired property holds:
\begin{equation}\label{eq:desired_prop}
\mathbf{x}(t)\in L_\infty^n[t_0,\infty]; \
f(\mathbf{x}(t),{\boldsymbol{\theta}},t)-f(\mathbf{x},\hat{{\boldsymbol{\theta}}}(t),t)\in L_{2}^1[t_0,\infty]
\end{equation}

Consider the following adaptation algorithms:
\begin{equation}\label{fin_forms_ours_tr1}
\begin{split}
\hat{{\boldsymbol{\theta}}}(\mathbf{x},t)&=\Gamma(\hat{{\boldsymbol{\theta}}}_P(\mathbf{x},t)+\hat{{\boldsymbol{\theta}}}_I(t));
\ \Gamma\in\mathbb{R}^{d\times d}, \ \Gamma>0
\\ \hat{{\boldsymbol{\theta}}}_P(\mathbf{x},t)&=
\psi(\mathbf{x},t)\boldsymbol{\alpha}(\mathbf{x},t)-\Psi(\mathbf{x},t) \\
\dot{\hat{{\boldsymbol{\theta}}}}_I&=\varphi(\psi(\mathbf{x},t),{\boldsymbol{\omega}},t)\boldsymbol{\alpha}(\mathbf{x},t)+\mathcal{R}(\mathbf{x},\hat{{\boldsymbol{\theta}}},u(\mathbf{x},\hat{{\boldsymbol{\theta}}},t),t),
\end{split}
\end{equation}
where the function
$\mathcal{R}(\mathbf{x},\hat{{\boldsymbol{\theta}}},u(\mathbf{x},\hat{{\boldsymbol{\theta}}},t),t):\mathbb{R}^n\times\mathbb{R}^d\times\mathbb{R}\times\mathbb{R}_+\rightarrow\mathbb{R}^d$
in (\ref{fin_forms_ours_tr1}) is given as follows:
\begin{equation}\label{fin_forms_ours_tr11}
\begin{split}
&\mathcal{R}(\mathbf{x},u(\mathbf{x},\hat{{\boldsymbol{\theta}}},t),t)={{\partial}
\Psi(\mathbf{x},t)}/{{\partial} t}-\psi(\mathbf{x},t)({{\partial}
\boldsymbol{\alpha}(\mathbf{x},t)}/{{\partial} t}+L_{\mathbf{f}_1}\boldsymbol{\alpha}(\mathbf{x},t))\\
& + L_{\mathbf{f}_1}
\Psi(\mathbf{x},t)-(\psi(\mathbf{x},t)L_{\mathbf{g}_1}\boldsymbol{\alpha}(\mathbf{x},t)-L_{\mathbf{g}_1}
\Psi(\mathbf{x},t))u(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)
\end{split}
\end{equation}
and function
$\Psi(\mathbf{x},t):\mathbb{R}^{n}\times\mathbb{R}_+\rightarrow\mathbb{R}_d$,
$\Psi(\mathbf{x},t)\in \mathcal{C}^1$ satisfies Assumption
\ref{assume:explicit_realizability}.
\begin{assume}\label{assume:explicit_realizability} There exists a function $\Psi(\mathbf{x},t)$ such that
\begin{equation}\label{eq:assume_explicit}
\frac{{\partial} \Psi(\mathbf{x},t)}{{\partial} \mathbf{x}_2}-\psi(\mathbf{x},t)\frac{{\partial}
\boldsymbol{\alpha}(\mathbf{x},t)}{{\partial} \mathbf{x}_2}=0
\end{equation}
\end{assume}
Additional restrictions imposed by this assumption will be discussed in some details after we summarize the properties of system (\ref{system1}), (\ref{control}),
(\ref{fin_forms_ours_tr1}), (\ref{fin_forms_ours_tr11}) in the following theorem.

\begin{theorem}[Properties of the decoupled systems]\label{stability_theorem}
Let system (\ref{system1}), (\ref{error_model_d}),
(\ref{fin_forms_ours_tr1}), (\ref{fin_forms_ours_tr11}) be given and Assumptions \ref{assume:alpha}, \ref{assume:alpha_upper},
\ref{assume:explicit_realizability} be satisfied. Then the following properties hold

P1) Let for the given initial conditions $\mathbf{x}(t_0)$,
$\hat{{\boldsymbol{\theta}}}_I(t_0)$ and parameters vector ${\boldsymbol{\theta}}$,
interval $[t_0,T^\ast]$ be the (maximal) time-interval of existence of solutions of the closed loop system (\ref{system1}),
(\ref{error_model_d}), (\ref{fin_forms_ours_tr1}),
(\ref{fin_forms_ours_tr11}). Then
\begin{equation}\label{eq:f_diff_L2}
\|f(\mathbf{x}(t),{\boldsymbol{\theta}},t)-f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t))\|_{2,[t_0,T^\ast]}\leq D_f({\boldsymbol{\theta}},t_0,\Gamma,\|\varepsilon(t)\|_{2,[t_0,T^\ast]});
\end{equation}
\[
D_f({\boldsymbol{\theta}},t_0,\Gamma,\|\varepsilon(t)\|_{2,[t_0,T^\ast]})=\left(\frac{D}{2}\|{\boldsymbol{\theta}}-\hat{{\boldsymbol{\theta}}}(t_0)\|^{2}_{\Gamma^{-1}}\right)^{0.5}
+ \frac{D}{D_1}\|\varepsilon(t)\|_{2,[t_0,T^\ast]}
\]
\[
\|{\boldsymbol{\theta}}-\hat{\boldsymbol{\theta}}(t)\|^{2}_{\Gamma^{-1}}\leq
\|\hat{{\boldsymbol{\theta}}}(t_0)-{\boldsymbol{\theta}}\|^{2}_{\Gamma^{-1}}+\frac{D}{2 D_1^2}\|\varepsilon(t)\|^{2}_{2,[t_0,T^\ast]}
\]

\noindent In addition, if Assumptions \ref{assume:psi} and
\ref{assume:gain} are satisfied then

P2) $\psi(\mathbf{x}(t),t)\in L_\infty^1[t_0,\infty]$, $\mathbf{x}(t)\in L_{\infty}^n[t_0,\infty]$ and
\begin{equation}\label{eq:psi_gain}
\|\psi(\mathbf{x}(t),t)\|_{\infty,[t_0,\infty]}\leq
\gamma_{\infty,2}\left(\psi(\mathbf{x}_0,t_0),{\boldsymbol{\omega}},\mathcal{D}\right)
\end{equation}
\[
\mathcal{D}=D_f({\boldsymbol{\theta}},t_0,\Gamma,\|\varepsilon(t)\|_{2,[t_0,\infty]})+\|\varepsilon(t)\|_{2,[t_0,\infty]}
\]

P3) if properties H\ref{hyp:locally_bound_uniform_f},
H\ref{hyp:locally_bound_uniform_phi} hold, and system
(\ref{eq:target_dynamics}) has $L_{2}^1 [t_0,\infty]\mapsto L_{p}^1 [t_0,\infty]$, $p>1$ gain with respect to input $\zeta(t)$
and output $\psi$ then
\begin{equation}\label{eq:convergence_psi_theorem}
\varepsilon(t)\in L_{2}^1 [t_0,\infty]\cap L_{\infty}^1[t_0,\infty]\Rightarrow
\lim_{t\rightarrow\infty}\psi(\mathbf{x}(t),t)=0
\end{equation}

If, in addition, property H\ref{hyp:locally_bound_uniform_df}
holds, and the functions $\boldsymbol{\alpha}(\mathbf{x},t)$, ${\partial}
\psi(\mathbf{x},t)/{\partial} t$ are locally bounded with respect to $\mathbf{x}$
uniformly in $t$, then

P4) the following holds
\begin{equation}\label{eq:convergence_f_theorem}
\lim_{t\rightarrow\infty}f(\mathbf{x}(t),{\boldsymbol{\theta}},t)-f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)=0
\end{equation}

\end{theorem}
The proof of Theorem \ref{stability_theorem} and subsequent results are given in Section 6.

Let us briefly comment on Assumption
\ref{assume:explicit_realizability}.
Let $\boldsymbol{\alpha}(\mathbf{x},t)\in
\mathcal{C}^2$,
$\boldsymbol{\alpha}(\mathbf{x},t)=\mathrm{col}(\alpha_1(\mathbf{x},t),\dots,\alpha_d(\mathbf{x},t))$,
then necessary and sufficient conditions for existence of the function $\Psi(\mathbf{x},t)$ follow from the Poincar$\acute{\mathrm{e}}$ lemma:
\begin{equation}\label{eq:poincare}
\frac{{\partial}}{{\partial} \mathbf{x}_2}\left(\psi(\mathbf{x},t)\frac{{\partial}
\alpha_i(\mathbf{x},t)}{{\partial}
\mathbf{x}_2}
\right)=\left(\frac{{\partial}}{{\partial}
\mathbf{x}_2}\left(\psi(\mathbf{x},t)\frac{{\partial} \alpha_i(\mathbf{x},t)}{{\partial}
\mathbf{x}_2}
\right)
\right)^T
\end{equation}
This relation, in the form of conditions of existence of the solutions for function $\Psi(\mathbf{x},t)$ in
(\ref{eq:assume_explicit}), takes into account structural properties of system (\ref{system1}), (\ref{error_model_d}).
Indeed,
consider partial derivatives ${\partial} \alpha_i(\mathbf{x},t)/{\partial} \mathbf{x}_2$,
${\partial} \psi(\mathbf{x},t)/{\partial} \mathbf{x}_2$ with respect to the vector
$\mathbf{x}_2=(x_{21},\dots,x_{2p})^T$. Let
\begin{equation}\label{eq:single_dim}
\begin{split}
\frac{{\partial} \psi(\mathbf{x},t)}{{\partial} \mathbf{x}_2}=\left(\begin{array}{cccccccc}
0& 0
& \cdots & 0& \ast & 0&\cdots&0
\end{array}\right), \
\frac{{\partial}
\alpha_i(\mathbf{x},t)}{{\partial}\mathbf{x}_2}=\left(\begin{array}{cccccccc}
0 & 0
& \cdots & 0&
\ast &
0&\cdots&0
\end{array}\right)
\end{split}
\end{equation}
where the symbol $\ast$ denotes a function of $\mathbf{x}$ and $t$. Then condition (\ref{eq:single_dim}) guarantees that equality
(\ref{eq:poincare}) (and, subsequently, Assumption
\ref{assume:explicit_realizability}) holds. In case ${\partial}
\alpha(\mathbf{x}_1\oplus \mathbf{x}_2,t)/{\partial} \mathbf{x}_2=0$, Assumption
\ref{assume:explicit_realizability} holds for arbitrary
$\psi(\mathbf{x},t)\in \mathcal{C}^1$. If $\psi(\mathbf{x},t)$,
$\boldsymbol{\alpha}(\mathbf{x},t)$ depend on a single component of $\mathbf{x}_2$, for instance $x_{2k}, \ k\in\{0,\dots,p\}$, then conditions
(\ref{eq:single_dim}) hold and the function $\Psi(\mathbf{x},t)$ can be derived explicitly by integration
\begin{equation}\label{eq:single_dim_int}
\Psi(\mathbf{x},t)=\int\psi(\mathbf{x},t)\frac{\boldsymbol{\alpha}(\mathbf{x},t)}{{\partial} x_{2k}}d x_{2k}
\end{equation}
In all other cases, existence of the required function
$\Psi(\mathbf{x},t)$ follows from (\ref{eq:poincare}).

In the general case, when $\dim\{\mathbf{x}_2\}>1$, the problems of finding a function $\Psi(\mathbf{x},t)$ satisfying condition
(\ref{eq:assume_explicit}) can be avoided (or converted into one with an already known solutions such as (\ref{eq:poincare}),
(\ref{eq:single_dim_int})) by the {\it embedding} technique proposed in \cite{ECC_2003}. The main idea of the method is to introduce an auxiliary system that is forward-complete with respect to input $\mathbf{x}(t)$
\begin{equation}\label{eq:embed}
\begin{split}
\dot{{\boldsymbol{\xi}}}&=\mathbf{f}_{\boldsymbol{\xi}}(\mathbf{x},{\boldsymbol{\xi}},t), \ {\boldsymbol{\xi}}\in\mathbb{R}^z \\
\mathbf{h}_\xi&=\mathbf{h}_\xi({\boldsymbol{\xi}},t), \
\mathbb{R}^z\times\mathbb{R}_+\rightarrow\mathbb{R}^h
\end{split}
\end{equation}
such that
\begin{equation}\label{eq:embed_L2}
\|f(\mathbf{x}(t),{\boldsymbol{\theta}},t)-f(\mathbf{x}_1(t)\oplus\mathbf{h}_\xi(t)\oplus\mathbf{x}_2'(t),{\boldsymbol{\theta}},t)\|_{2,[t_0,T]}
\leq C_\xi\in\mathbb{R}_+
\end{equation}
for all $T\geq t_0$, and $\dim\{{\mathbf{h}_\xi}\}+\dim{\{\mathbf{x}_2'\}}=p$.
Then (\ref{error_model_d}) can be rewritten as follows:
\begin{equation}\label{error_model_d1}
{\dot\psi}=f(\mathbf{x}_1\oplus\mathbf{h}_\xi\oplus\mathbf{x}_2',{\boldsymbol{\theta}},t)-f(\mathbf{x}_1\oplus\mathbf{h}_\xi\oplus\mathbf{x}_2',\hat{\boldsymbol{\theta}},t)-\varphi(\psi,{\boldsymbol{\omega}},t)+\varepsilon_\xi(t),
\end{equation}
where $\varepsilon_\xi(t)\in L_{2}^1 [t_0,\infty]$, and
$\dim\{\mathbf{x}_2'\}=p-h<p$. In principle, the dimension of $\mathbf{x}_2'$
could be reduced to $1$ or $0$. As soon as this is ensured,
Assumption \ref{assume:explicit_realizability} will be satisfied and the results of Theorem \ref{stability_theorem} follow.
Sufficient conditions ensuring the existence of such an embedding in the general case are provided in \cite{ECC_2003}. For systems in which the parametric uncertainty can be reduced to vector fields with low-triangular structure the embedding is given in
\cite{ALCOSP_2004}.

\section{Main Results}\label{sec:main}

Without loss of generality let us rewrite interconnection
(\ref{eq:system:s1}), (\ref{eq:system:s2}) as follows
:
\begin{equation}\label{eq:system:s11}
\begin{split}
\dot{\mathbf{x}}_1&=\mathbf{f}_1(\mathbf{x})+\mathbf{g}_1(\mathbf{x})u_x\\
\dot{\mathbf{x}}_2 &=\mathbf{f}_2(\mathbf{x},{\boldsymbol{\theta}}_x)+\gamma_y(\mathbf{y},t)+
\mathbf{g}_2(\mathbf{x})u_x
\end{split}
\end{equation}

\begin{equation}\label{eq:system:s21}
\begin{split}
\dot{\mathbf{y}}_1&=\mathbf{q}_1(\mathbf{y})+\mathbf{z}_1(\mathbf{y})u_y\\
\dot{\mathbf{y}}_2&=\mathbf{q}_2(\mathbf{y},{\boldsymbol{\theta}}_y)+\gamma_x(\mathbf{x},t)+\mathbf{z}_2(\mathbf{y})u_y
\end{split}
\end{equation}

Let us now consider the following control functions
\begin{equation}\label{control_s1}
\begin{split}
u_x(\mathbf{x},\hat{{\boldsymbol{\theta}}}_x,{\boldsymbol{\omega}}_x,t)&=(L_{\mathbf{g}(\mathbf{x})}\psi_x(\mathbf{x},t))^{-1}\left(-L_{\mathbf{f}(\mathbf{x},\hat{{\boldsymbol{\theta}}}_x)}\psi_x(\mathbf{x},t)-\varphi_x(\psi_x,{\boldsymbol{\omega}}_x,t)\right.\\
& \left.-\frac{{\partial}\psi_x(\mathbf{x},t)}{{\partial} t}\right), \ \ \varphi_x: \
\mathbb{R}\times\mathbb{R}^w\times\mathbb{R}_+\rightarrow\mathbb{R}
\end{split}
\end{equation}
\begin{equation}\label{control_s2}
\begin{split}
u_y(\mathbf{y},\hat{{\boldsymbol{\theta}}}_y,{\boldsymbol{\omega}}_y,t)&=(L_{\mathbf{z}(\mathbf{y})}\psi_y(\mathbf{y},t))^{-1}\left(-L_{\mathbf{q}(\mathbf{y},\hat{{\boldsymbol{\theta}}}_y)}\psi_y(\mathbf{y},t)-\varphi_y(\psi_y,{\boldsymbol{\omega}}_y,t)\right.\\
&\left.-\frac{{\partial}\psi_y(\mathbf{y},t)}{{\partial} t}\right), \ \ \varphi_y: \
\mathbb{R}\times\mathbb{R}^w\times\mathbb{R}_+\rightarrow\mathbb{R}
\end{split}
\end{equation}
These functions transform the original equations
(\ref{eq:system:s11}), (\ref{eq:system:s21}) into the following form
\begin{equation}\label{eq:error_coupled}
\begin{split}
{\dot\psi}_x&=-\varphi_x(\psi_x,{\boldsymbol{\omega}}_x,t)+f_x(\mathbf{x},{\boldsymbol{\theta}}_x,t)-f_x(\mathbf{x},\hat{{\boldsymbol{\theta}}}_x,t)+h_y(\mathbf{x},\mathbf{y},t)\\
{\dot\psi}_y&=-\varphi_y(\psi_x,{\boldsymbol{\omega}}_y,t)+f_y(\mathbf{y},{\boldsymbol{\theta}}_y,t)-f_y(\mathbf{y},\hat{{\boldsymbol{\theta}}}_y,t)+h_x(\mathbf{x},\mathbf{y},t),
\end{split}
\end{equation}
where
\[
h_x(\mathbf{x},\mathbf{y},t)=L_{\gamma_y(\mathbf{y},t)}\psi_x(\mathbf{x},t), \
h_y(\mathbf{x},\mathbf{y},t)=L_{\gamma_x(\mathbf{x},t)}\psi_y(\mathbf{y},t)
\]
\[
f_x(\mathbf{x},{\boldsymbol{\theta}}_x,t)=L_{\mathbf{f}(\mathbf{x},{\boldsymbol{\theta}}_x)}\psi_x(\mathbf{x},t), \
f_y(\mathbf{x},{\boldsymbol{\theta}}_y,t)=L_{\mathbf{q}(\mathbf{y},{\boldsymbol{\theta}}_y)}\psi_y(\mathbf{y},t)
\]

Consider the following adaptation algorithms
\begin{equation}\label{fin_forms_ours_tr1x}
\begin{split}
\hat{{\boldsymbol{\theta}}}_x(\mathbf{x},t)&=\Gamma_x(\hat{{\boldsymbol{\theta}}}_{P,x}(\mathbf{x},t)+\hat{{\boldsymbol{\theta}}}_{I,x}(t));
\ \Gamma_x\in\mathbb{R}^{d\times d}, \ \Gamma_x>0
\\ \hat{{\boldsymbol{\theta}}}_{P,x}(\mathbf{x},t)&=
\psi_x(\mathbf{x},t)\boldsymbol{\alpha}_x(\mathbf{x},t)-\Psi_x(\mathbf{x},t) \\
\dot{\hat{{\boldsymbol{\theta}}}}_{I,x}&=\varphi_x(\psi_x(\mathbf{x},t),{\boldsymbol{\omega}}_x,t)\boldsymbol{\alpha}_x(\mathbf{x},t)+\mathcal{R}_x(\mathbf{x},\hat{{\boldsymbol{\theta}}}_x,u_x(\mathbf{x},\hat{{\boldsymbol{\theta}}}_x,t),t),
\end{split}
\end{equation}

\begin{equation}\label{fin_forms_ours_tr1y}
\begin{split}
\hat{{\boldsymbol{\theta}}}_y(\mathbf{x},t)&=\Gamma_y(\hat{{\boldsymbol{\theta}}}_{P,y}(\mathbf{y},t)+\hat{{\boldsymbol{\theta}}}_{I,y}(t));
\ \Gamma_y\in\mathbb{R}^{d\times d}, \ \Gamma_y>0
\\ \hat{{\boldsymbol{\theta}}}_{P,y}(\mathbf{y},t)&=
\psi_y(\mathbf{y},t)\boldsymbol{\alpha}_y(\mathbf{y},t)-\Psi_y(\mathbf{y},t) \\
\dot{\hat{{\boldsymbol{\theta}}}}_{I,y}&=\varphi_y(\psi_y(\mathbf{y},t),{\boldsymbol{\omega}}_y,t)\boldsymbol{\alpha}_y(\mathbf{y},t)+\mathcal{R}_y(\mathbf{x},\hat{{\boldsymbol{\theta}}}_y,u_y(\mathbf{y},\hat{{\boldsymbol{\theta}}}_y,t),t),
\end{split}
\end{equation}
where $\mathcal{R}_x(\cdot)$, $\mathcal{R}_y(\cdot)$ are defined as in (\ref{fin_forms_ours_tr11}), and the functions
$\Psi_x(\cdot)$, $\Psi_y(\cdot)$ will be specified later. Now we are ready to formulate the following result

\begin{theorem}[Properties of the interconnected systems]\label{theorem:interconnection} Let systems (\ref{eq:system:s11}), (\ref{eq:system:s21}) be given. Furthermore, suppose that the following conditions hold:

1) The functions $\psi_x(\mathbf{x},t)$, $\psi_y(\mathbf{y},t)$ satisfy Assumption \ref{assume:psi} for systems (\ref{eq:system:s11}),
(\ref{eq:system:s21}) respectively;

2) The systems
\begin{equation}\label{eq:target_dynamics_connected}
\dot{\psi}_x=-\varphi_x(\psi_x,{\boldsymbol{\omega}}_x,t)+\zeta_x(t), \ \
\dot{\psi}_y=-\varphi_y(\psi_y,{\boldsymbol{\omega}}_y,t)+\zeta_y(t)
\end{equation}
satisfy Assumption \ref{assume:gain} with corresponding mappings
\[
\gamma_{x_{\infty,2}}(\psi_{x_0},{\boldsymbol{\omega}}_x,\|\zeta_x(t)\|_{2,[t_0,T]}),
\ \
\gamma_{y_{\infty,2}}(\psi_{y_0},{\boldsymbol{\omega}}_y,\|\zeta_y(t)\|_{2,[t_0,T]}),
\]

3) The systems (\ref{eq:target_dynamics_connected}) have
$L_2^1[t_0,\infty]\mapsto L_2^1[t_0,\infty]$ gains, that is
\begin{equation}\label{eq:L_2_2_gains}
\begin{split}
\|\psi_x(\mathbf{x}(t),t)\|_{2,[t_0,T]}&\leq C_{\gamma_x}+\gamma_{x_{2,2}}(\|\zeta_x(t)\|_{2,[t_0,T]}),\\
\|\psi_y(\mathbf{y}(t),t)\|_{2,[t_0,T]}&\leq C_{\gamma_y}+\gamma_{y_{2,2}}(\|\zeta_y(t)\|_{2,[t_0,T]}),\\
C_{\gamma_x}, \ C_{\gamma_y}\in\mathbb{R}_+& \gamma_{x_{2,2}}, \
\gamma_{y_{2,2}}\in\mathcal{K}_\infty
\end{split}
\end{equation}

4) The functions $f_x(\mathbf{x},{\boldsymbol{\theta}}_x,t)$,
$f_y(\mathbf{y},{\boldsymbol{\theta}}_y,t)$ satisfy Assumptions \ref{assume:alpha},
\ref{assume:alpha_upper} with corresponding constants $D_x$,
$D_{x_1}$, $D_y$, $D_{y_1}$ and functions $\boldsymbol{\alpha}_x(\mathbf{x},t)$,
$\boldsymbol{\alpha}_y(\mathbf{y},t)$;

5) The functions $h_x(\mathbf{x},\mathbf{y},t)$, $h_y(\mathbf{x},\mathbf{y},t)$ satisfy the following inequalities:
\begin{equation}\label{eq:disturbance_gain}
\|h_x(\mathbf{x},\mathbf{y},t)\|\leq \beta_x \|\psi_x(\mathbf{x},t)\|, \
\|h_y(\mathbf{x},\mathbf{y},t)\|\leq \beta_y \|\psi_y(\mathbf{y},t)\|, \ \beta_x,
\beta_y\in \mathbb{R}_+
\end{equation}

Finally, let the functions $\Psi_x(\mathbf{x},t)$, $\Psi_y(\mathbf{y},t)$ in
(\ref{fin_forms_ours_tr1x}), (\ref{fin_forms_ours_tr1y}) satisfy Assumption \ref{assume:explicit_realizability}
for systems (\ref{eq:system:s11}), (\ref{eq:system:s21})
respectively, and there exist functions $\rho_1(\cdot), \
\rho_2(\cdot), \ \rho_3(\cdot)>Id(\cdot)\in\mathcal{K}_\infty$ and constant $\bar{\Delta}\in\mathbb{R}_+$ such the following inequality holds:
\begin{equation}\label{eq:small_gain_adapt}
\beta_y\circ\gamma_{y_{2,2}}\circ\rho_1\circ\left(\frac{D_y}{D_{y,1}}+1\right)\circ\rho_3\circ
\beta_x\circ
\gamma_{x_{2,2}}\circ\rho_2\circ\left(\frac{D_x}{D_{x,1}}+1\right)(\Delta)<
\Delta
\end{equation}
for all $\Delta\geq \bar{\Delta}$. Then

C1) The interconnection (\ref{eq:system:s11}),
(\ref{eq:system:s21}) with controls (\ref{control_s1}),
(\ref{control_s2}) is forward-complete and trajectories $\mathbf{x}(t)$,
$\mathbf{y}(t)$ are bounded

Furthermore,

C2) if properties H\ref{hyp:locally_bound_uniform_f},
H\ref{hyp:locally_bound_uniform_phi} hold for
$f_x(\mathbf{x},{\boldsymbol{\theta}}_x,t)$, $f_y(\mathbf{y},{\boldsymbol{\theta}}_y,t)$,
$h_x(\mathbf{x},\mathbf{y},t)$, $h_y(\mathbf{x},\mathbf{y},t)$, and also functions
$\varphi_x(\psi_x,{\boldsymbol{\omega}}_x,t)$,
$\varphi_y(\psi_y,{\boldsymbol{\omega}}_y,t)$, then
\begin{equation}\label{eq:convergence_psi_xy}
\lim_{t\rightarrow\infty}\psi_x(\mathbf{x}(t),t)=0, \
\lim_{t\rightarrow\infty}\psi_y(\mathbf{y}(t),t)=0
\end{equation}

Moreover,

C3) if property H\ref{hyp:locally_bound_uniform_df} holds for
$f_x(\mathbf{x},{\boldsymbol{\theta}}_x,t)$, $f_y(\mathbf{y},{\boldsymbol{\theta}}_y,t)$, and the functions
\[
\boldsymbol{\alpha}_x(\mathbf{x},t), \ {\partial} \psi_x(\mathbf{x},t)/{\partial} t, \
\boldsymbol{\alpha}_y(\mathbf{y},t), \ {\partial} \psi_y(\mathbf{y},t)/{\partial} t
\]
are locally bounded with respect to $\mathbf{x}$, $\mathbf{y}$ uniformly in $t$,
then
\begin{equation}\label{eq:convergence_f_xy}
\begin{split}
\lim_{t\rightarrow\infty}f_x(\mathbf{x}(t),{\boldsymbol{\theta}}_x,t)-f_x(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}_x(t),t)&=0,
\\
\lim_{t\rightarrow\infty}f_y(\mathbf{y}(t),{\boldsymbol{\theta}}_y,t)-f_y(\mathbf{y}(t),\hat{{\boldsymbol{\theta}}}_y(t),t)&=0
\end{split}
\end{equation}
\end{theorem}

Let us briefly comment on the conditions and assumptions of Theorem \ref{theorem:interconnection}. Conditions 1), 2) specify restrictions on the goal functionals, similar to those of Theorem
\ref{stability_theorem}. Condition 3) is analogous to requirement to P3) in Theorem \ref{stability_theorem}, condition 5) specifies uncertainties in the coupling functions $h_x(\cdot)$, $h_y(\cdot)$
in terms of their growth rates w.r.t. $\psi_x(\cdot)$,
$\psi_y(\cdot)$. We observe here that this property is needed in order to characterize the $L_2$ norms of functions
$h_x(\mathbf{x}(t),\mathbf{y}(t),t)$, $h_y(\mathbf{x}(t),\mathbf{y}(t),t)$ in terms of the
$L_2$ norms of functions $\psi_x(\mathbf{x}(t),t)$, $\psi_y(\mathbf{y}(t),t)$.
Therefore, it is possible to replace requirement
(\ref{eq:disturbance_gain}) with the following set of conditions:
\begin{equation}\label{eq:disturbance_gain_1}
\begin{split}
\|h_x(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]}&\leq \beta_x
\|\psi_x(\mathbf{x}(t),t)\|_{2,[t_0,T]}+C_x, \\
\|h_y(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]}&\leq \beta_y
\|\psi_y(\mathbf{y}(t),t)\|_{2,[t_0,T]}+C_y
\end{split}
\end{equation}
The replacement will allow us to extend results of Theorem
\ref{theorem:interconnection} to interconnections of systems where the coupling functions do not depend explicitly on
$\psi_x(\mathbf{x}(t),t)$, $\psi_y(\mathbf{y}(t),t)$. We illustrate this possibility later with an example.

Condition (\ref{eq:small_gain_adapt}) is the small-gain condition with respect to the $L_2^1[t_0,T]$ norms for interconnection
(\ref{eq:system:s11}), (\ref{eq:system:s21}) with control
(\ref{control_s1}), (\ref{control_s2}). In the case that mappings
$\gamma_{x_{2,2}}(\cdot)$, $\gamma_{y_{2,2}}(\cdot)$ in
(\ref{eq:target_dynamics_connected}) are majorated by linear functions
\[
\gamma_{x_{2,2}}(\Delta)\leq g_{x_{2,2}} \Delta, \
\gamma_{y_{2,2}}(\Delta)\leq g_{y_{2,2}} \Delta, \ \Delta\geq 0,
\]
condition (\ref{eq:small_gain_adapt}) reduces to the much simpler
\[
\beta_y \beta_x g_{x_{2,2}} g_{y_{2,2}}
\left(\frac{D_y}{D_{y,1}}+1\right)\left(\frac{D_x}{D_{x,1}}+1\right)<
1
\]
Notice also that the mappings $\gamma_{x_{2,2}}(\cdot)$,
$\gamma_{y_{2,2}}(\cdot)$ are defined by properties of the target dynamics (\ref{eq:target_dynamics_connected}), and, in principle,
these can be made arbitrarily small. This eventually leads to the following conclusion: the smaller the $L_2$-gains of the target dynamics of systems $\mathcal{S}_1$, $\mathcal{S}_2$, the wider the class of nonlinearities (bounds for $\beta_x$, $\beta_y$, domains of $D_x$, $D_{1,x}$, $D_y$, $D_{1,y}$) which admit a solution to Problem \ref{problem:decentralized}.

\paragraph{Example}

Let us illustrate application of Theorem
\ref{theorem:interconnection} to the problem of decentralized control of two coupled oscillators with nonlinear damping.
Consider the following interconnected systems:
\begin{equation}\label{eq:example_dec_model}
\left\{\begin{array}{ll}
\dot{x}_{1}&=x_{2}\\
\dot{x}_{2}&=f_x(x_{1},\theta_x)+k_1 y_{1} + u_x,
\end{array} \right. \ \
\left\{
\begin{array}{ll}
\dot{y}_{1}&=y_{2}\\
\dot{y}_{22}&=f_y(y_{1},\theta_y)+k_2 x_{1}+ u_y,
\end{array}\right.
\end{equation}
where $k_1$, $k_2\in\mathbb{R}$ are uncertain parameters of coupling,
functions $f(x_{1},\theta_x)$, $f(y_{1},\theta_y)$
stand for the nonlinear damping terms, and
$\theta_{x}$, $\theta_y$ are unknown parameters. For illustrative purpose we assume the following mathematical model for functions
$f_x(\cdot)$, $f_y(\cdot)$ in (\ref{eq:example_dec_model}):
\begin{equation}\label{eq:example_dec_uncertainty}
\begin{split}
f_x(x_{1},\theta_x)&= \theta_x (x_{1}-x_0)+0.5\sin
(\theta_x(x_{1}-x_0)),\\
\ f_y(y_{1},\theta_y)&= \theta_y (y_{1}-y_0)+0.6\sin
(\theta_y(y_{1}-y_0))
\end{split}
\end{equation}
where $x_0$, $y_0$ are known. Let the control goal be to steer states $\mathbf{x}$ and $\mathbf{y}$ to the origin. Consider the following goal functions
\begin{equation}\label{eq:example_psi}
\psi_x(\mathbf{x},t)=x_1+x_2, \ \psi_y(\mathbf{y},t)= y_1+y_2
\end{equation}
Taking into account equations (\ref{eq:example_dec_model}) and
(\ref{eq:example_psi}) we can derive that
\begin{equation}\label{eq:example_relative_dynamics}
\dot{x}_1=-x_1+\psi_x(\mathbf{x}(t),t), \ \dot{y}_1=-y_1+\psi_y(\mathbf{y},t)
\end{equation}
This automatically implies that
\[
\begin{split}
\|x_1(t)\|_{\infty,[t_0,T]}&\leq
\|x_1(t_0)\|+\|\psi_x(\mathbf{x}(t),t)\|_{\infty,[t_0,T]}\\
\|y_1(t)\|_{\infty,[t_0,T]}&\leq
\|y_1(t_0)\|+\|\psi_y(\mathbf{y}(t),t)\|_{\infty,[t_0,T]}
\end{split}
\]
Hence, Assumption \ref{assume:psi} is satisfied for chosen goal functions $\psi_x(\cdot)$ and $\psi_y(\cdot)$. Notice also that equalities (\ref{eq:example_relative_dynamics}) imply that
\begin{equation}\label{eq:example_L2_gains}
\begin{split}
\|x_1(t)\|_{2,[t_0,T]}&\leq 2^{-1/2}\|x_1(t_0)\|+
\|\psi_x(\mathbf{x},t)\|_{2,[t_0,T]}\\
\|y_1(t)\|_{2,[t_0,T]}&\leq 2^{-1/2}\|y_1(t_0)\|+
\|\psi_y(\mathbf{y},t)\|_{2,[t_0,T]}
\end{split}
\end{equation}
Moreover, according to (\ref{eq:example_relative_dynamics})
limiting relations
\begin{equation}\label{eq:example_control_goal_limit}
\begin{split}
&
\lim_{t\rightarrow\infty}\psi_x(\mathbf{x}(t),t)=\lim_{t\rightarrow\infty}x_1(t)+x_2(t)=0,\\
&
\lim_{t\rightarrow\infty}\psi_y(\mathbf{y}(t),t)=\lim_{t\rightarrow\infty}y_1(t)+y_2(t)=0
\end{split}
\end{equation}
guarantee that
\[
\lim_{t\rightarrow\infty} x_1(t)=0, \
\lim_{t\rightarrow\infty}x_2(t)=0, \ \lim_{t\rightarrow\infty}
y_1(t)=0, \ \lim_{t\rightarrow\infty}y_2(t)=0
\]
Hence, property (\ref{eq:example_control_goal_limit}) ensures asymptotic reaching of the control goal.

According to equations (\ref{control_s1}), (\ref{control_s2})
control functions
\begin{equation}\label{eq:example_control}
\begin{split}
u_x&=-\lambda_x\psi_x-x_2-f_x(x_1,\hat{\theta}_x)\\
u_y&=-\lambda_y\psi_y-y_2-f_y(y_1,\hat{\theta}_y), \ \lambda_x, \
\lambda_y>0
\end{split}
\end{equation}
transform system (\ref{eq:example_dec_model}) into the following form
\begin{equation}\label{eq:example_error_model}
\begin{split}
\dot{\psi}_x&=-\lambda_x \psi_x +
f_x(x_1,\theta_x)-f_x(x_1,\hat{\theta}_x)+k_1 y_1\\
\dot{\psi}_x&=-\lambda_x \psi_x +
f_x(x_1,\theta_x)-f_x(x_1,\hat{\theta}_x)+k_2 x_1
\end{split}
\end{equation}
Notice that systems
\[
\dot{\psi}_x=-\lambda_x \psi_x +\xi_x(t), \
\dot{\psi}_y=-\lambda_y \psi_t +\xi_y(t)
\]
satisfy Assumption \ref{assume:gain} with
\[
\gamma_{x_{2,2}}=\frac{1}{\lambda_x}\|\psi_x(\mathbf{x}(t),t)\|_{2,[t_0,T]},
\
\gamma_{y_{2,2}}=\frac{1}{\lambda_y}\|\psi_y(\mathbf{y}(t),t)\|_{2,[t_0,T]}
\]
respectively, and functions $f_x(\cdot)$, $f_y(\cdot)$ satisfy Assumptions \ref{assume:alpha}, \ref{assume:alpha_upper} with
\[
\begin{split}
&D_{x}=1.5, \ D_{x,1}=0.5, \ \alpha_x(\mathbf{x},t)= x_1-x_0, \\
&D_{y}=1.6, \ D_{y,1}=0.4, \ \alpha_y(\mathbf{y},t)= y_1-y_0
\end{split}
\]
Hence conditions 1)-4) of Theorem \ref{theorem:interconnection}
are satisfied. Furthermore, according to the remarks regarding condition 5) of the theorem, requirements
(\ref{eq:disturbance_gain}) can be replaced with implicit constraints (\ref{eq:disturbance_gain_1}). These, however,
according to (\ref{eq:example_L2_gains}) also hold with
$\beta_x=k_1$, $\beta_y=k_2$.

Given that $\alpha_x(\mathbf{x},t)=x_1-x_0$, $\alpha_y(\mathbf{y},t)=y_1-y_0$,
Assumption \ref{assume:explicit_realizability} will be satisfied for functions $\alpha_x(\mathbf{x},t)$, $\alpha_y(\mathbf{y},t)$ with
$\Psi_x(\cdot)=0$, $\Psi_y(\cdot)=0$. Therefore, adaptation algorithms (\ref{fin_forms_ours_tr1x}),
(\ref{fin_forms_ours_tr1y}) will have the following form:
\begin{eqnarray}\label{eq:example_adaptation}
\hat{\theta}_x&=& \Gamma_x((x_1+x_2) (x_1-x_0) +
\hat{\theta}_{x,I}),\nonumber \\
\dot{\hat\theta}_{x,I}&=& \lambda_x (x_1+x_2)(x_1-x_0) - (x_1+x_2)x_2\nonumber \\
\hat{\theta}_y&=& \Gamma_y((y_1+y_2) (y_1-y_0) +
\hat{\theta}_{y,I}),\\
\dot{\hat\theta}_{y,I}&=& \lambda_y (y_1+y_2)(y_1-y_0) -
(y_1+y_2)y_2\nonumber
\end{eqnarray}
Hence, according to Theorem \ref{theorem:interconnection}
boundedness of the solutions in the closed loop system
(\ref{eq:example_error_model}), (\ref{eq:example_adaptation}) is ensured upon the following condition
\begin{equation}\label{eq:example_condition_boundedness}
\frac{k_1 k_2}{\lambda_x
\lambda_y}\left(1+\frac{D_x}{D_{x,1}}\right)\left(1+\frac{D_y}{D_{y,1}}\right)<1
\Rightarrow k_1 k_2 < \frac{\lambda_x\lambda_y}{20}
\end{equation}
Moreover, given that properties H\ref{hyp:locally_bound_uniform_f}--
H\ref{hyp:locally_bound_uniform_phi} hold for the chosen functions
$\psi_x(\mathbf{x},t)$, $\psi_y(\mathbf{y},t)$, condition
(\ref{eq:example_condition_boundedness}) guarantees that limiting relations (\ref{eq:convergence_psi_xy}),
(\ref{eq:convergence_f_xy}) hold.

Trajectories of the closed loop system
(\ref{eq:example_dec_model}), (\ref{eq:example_control}),
(\ref{eq:example_adaptation}) with the following values of parameters $\Gamma_x=\Gamma_y=1$, $\lambda_x=\lambda_y=2$,
$x_0=y_0=1$, $\theta_x=\theta_y=1$ and initial conditions
$x_1(0)=-1$, $x_2(0)=0$, $y_1(0)=1$, $y_2(0)=0$,
$\hat{\theta}_{x,I}(0)=-1$, $\hat{\theta}_{y,I}(0)=-2$ are provided in Fig. \ref{fig:decentralized:example}.

\begin{figure}
\begin{center}
\includegraphics[width=300pt]{example_decentralized.eps}
\end{center}
\begin{center}
\caption{Plots of trajectories $x_1(t)$ (panel a), $x_2(t)$ (panel b), $y_1(t)$ (panel c), $y_2(t)$ (panel d) as functions of $t$ in closed loop system (\ref{eq:example_dec_model}),
(\ref{eq:example_control}), (\ref{eq:example_adaptation}). Dotted lines correspond to the case when $k_1=k_2=0.4$, and solid lines stand for solutions obtained with the following values of coupling
$k_1=1$, $k_2=0.1$}\label{fig:decentralized:example}
\end{center}
\end{figure}

\section{Conclusion}

We provided new tools for the design and analysis of adaptive decentralized control schemes. Our method allows the desired dynamics to be Lyapunov unstable and the parametrization of the uncertainties to be nonlinear. The results are based on a formulation of the problem for adaptive control as a problem of regulation in functional spaces (in particular, $L_2^1[t_0,T]$
spaces) rather than of simply reaching of the control goal in
$\mathbb{R}^n$. This allows us to introduce adaptation algorithms with new properties and apply a small-gain argument to establish applicability of these schemes to the problem of decentralized control.

In order to avoid unnecessary complications, state feedback was assumed in the main-loop controllers which transform original equation into the error coupled model. Extension of the results to output-feedback main loop controllers is a topic for future study.

\section{Proofs of the theorems}

\subsection{Proof of Theorem \ref{stability_theorem}}

Let us first show that property P1) holds. Consider solutions of system (\ref{system1}), (\ref{error_model_d}),
(\ref{fin_forms_ours_tr1}), (\ref{fin_forms_ours_tr11}) passing through the point $\mathbf{x}(t_0)$, $\hat{{\boldsymbol{\theta}}}_I(t_0)$ for
$t\in[t_0,T^\ast]$
. Let us calculate the time-derivative of function
$\hat{{\boldsymbol{\theta}}}(\mathbf{x},t)$:
$\dot{\hat{{\boldsymbol{\theta}}}}(\mathbf{x},t)=\Gamma({\dot{\hat{{\boldsymbol{\theta}}}}_{P}}+\dot{\hat{\boldsymbol{\theta}}}_I)=\Gamma({\dot\psi}\boldsymbol{\alpha}(\mathbf{x},t)+\psi\dot{\boldsymbol{\alpha}}(\mathbf{x},t)-\dot{\Psi}(\mathbf{x},t)+\dot{\hat{\boldsymbol{\theta}}}_I)$.
Notice that
\begin{equation}\label{t2_1}
\begin{split}
&\psi\dot{\boldsymbol{\alpha}}(\mathbf{x},t)-\dot{\Psi}(\mathbf{x},t)+\dot{\hat{{\boldsymbol{\theta}}}}_I=\psi(\mathbf{x},t)\frac{{\partial}
\boldsymbol{\alpha}(\mathbf{x},t)}{{\partial} \mathbf{x}_1}\dot{\mathbf{x}}_1+\psi(\mathbf{x},t)\frac{{\partial}
\boldsymbol{\alpha}(\mathbf{x})}{{\partial} \mathbf{x}_2}\dot{\mathbf{x}}_2 +\\
& \psi(\mathbf{x},t)\frac{{\partial}
\boldsymbol{\alpha}(\mathbf{x},t)}{{\partial} t}-
\frac{{\partial} \Psi(\mathbf{x},t)}{{\partial} \mathbf{x}_1}\dot{\mathbf{x}}_1-\frac{{\partial}
\Psi(\mathbf{x},t)}{{\partial} \mathbf{x}_2}\dot{\mathbf{x}}_2-\frac{{\partial} \Psi(\mathbf{x},t)}{{\partial} t}+\dot{\hat{\boldsymbol{\theta}}}_I
\end{split}
\end{equation}
According to Assumption \ref{assume:explicit_realizability},
$\frac{{\partial} \Psi(\mathbf{x},t)}{{\partial} \mathbf{x}_2}=\psi(\mathbf{x},t)\frac{{\partial}
\boldsymbol{\alpha}(\mathbf{x},t)}{{\partial} \mathbf{x}_2}
$. Then taking into account (\ref{t2_1}), we obtain
\begin{equation}\label{t2_2}
\begin{split}
&
\psi\dot{\boldsymbol{\alpha}}(\mathbf{x},t)-\dot{\Psi}(\mathbf{x},t)+\dot{\hat{{\boldsymbol{\theta}}}}_I=\left(\psi(\mathbf{x},t)\frac{{\partial}
\boldsymbol{\alpha}(\mathbf{x},t)}{{\partial} \mathbf{x}_1}-\frac{{\partial} \Psi}{{\partial} \mathbf{x}_1
}\right)\dot{\mathbf{x}}_1\\
&+\psi(\mathbf{x},t)\frac{{\partial} \boldsymbol{\alpha}(\mathbf{x},t)}{{\partial} t}-\frac{\Psi(\mathbf{x},t)}{{\partial} t}
\end{split}
\end{equation}
Notice that according to the proposed notation we can rewrite the term $\left(\psi(\mathbf{x},t)\frac{{\partial} \boldsymbol{\alpha}(\mathbf{x},t)}{{\partial}
\mathbf{x}_1}-\frac{{\partial} \Psi}{{\partial} \mathbf{x}_1 }\right)\dot{\mathbf{x}}_1$ in the following form: $\psi(\mathbf{x},t)L_{\mathbf{f}_1}
\boldsymbol{\alpha}(\mathbf{x},t)-L_{\mathbf{f}_1} \Psi(\mathbf{x},t)+
\left(\psi(\mathbf{x},t)L_{\mathbf{g}_1} \boldsymbol{\alpha}(\mathbf{x},t)-L_{\mathbf{g}_1}
\Psi(\mathbf{x},t)\right)u(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)$. Hence, it follows from (\ref{fin_forms_ours_tr1}) and (\ref{t2_2}) that
$\psi\dot{\boldsymbol{\alpha}}(\mathbf{x},t)-\dot{\Psi}(\mathbf{x},t)+\dot{\hat{{\boldsymbol{\theta}}}}_I=\varphi(\psi)\boldsymbol{\alpha}(\mathbf{x},t)
$. Therefore, the derivative $\dot{\hat{\boldsymbol{\theta}}}(\mathbf{x},t)$ can be written in the following way:
\begin{equation}\label{algorithm_dpsi}
\dot{\hat{{\boldsymbol{\theta}}}}=\Gamma({\dot\psi}+\varphi(\psi))\boldsymbol{\alpha}(\mathbf{x},t)
\end{equation}
Asymptotic properties of nonlinear parameterized control systems with adaptation algorithm (\ref{algorithm_dpsi}) under assumption of Lyapunov stability of the target dynamics were investigated in
\cite{tpt2003_tac}. In the present contribution we aim to provide characterizations of the closed loop system in terms of functional mappings between functions $\psi(\mathbf{x}(t),t)$, $\varepsilon(t)$,
and $f(\mathbf{x}(t),{\boldsymbol{\theta}},t)-f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)$ and without requiring Lyapunov stability of the target dynamics
(\ref{eq:target_dynamics}).

For this purpose consider the following positive-definite function:
\begin{equation}\label{V_theta}
V_{\hat{{\boldsymbol{\theta}}}}(\hat{{\boldsymbol{\theta}}},{\boldsymbol{\theta}},t)=
\frac{1}{2}\|\hat{{\boldsymbol{\theta}}}-{\boldsymbol{\theta}}\|^2_{\Gamma^{-1}} +
\frac{D}{4 D_1^2} \int_{t}^\infty\varepsilon^2(\tau)d\tau
\end{equation}
Its time-derivative according to equations (\ref{algorithm_dpsi})
can be obtained as follows:
\begin{equation}\label{eq:dV_full_alg}
\dot{V}_{\hat{{\boldsymbol{\theta}}}}(\hat{{\boldsymbol{\theta}}},{\boldsymbol{\theta}},t)=(\varphi(\psi)+{\dot\psi})(\hat{{\boldsymbol{\theta}}}-{\boldsymbol{\theta}})^{T}\boldsymbol{\alpha}(\mathbf{x},t)
-
\frac{D}{4 D_1^2}\varepsilon^2(t)
\end{equation}
Hence using Assumptions \ref{assume:alpha},
\ref{assume:alpha_upper} and equality (\ref{error_model_d}) we can estimate the derivative $\dot{V}_{\hat{{\boldsymbol{\theta}}}}$ as follows:
\begin{eqnarray}\label{parameric_deviation_derivative}
& &
\dot{V}_{\hat{{\boldsymbol{\theta}}}}(\hat{{\boldsymbol{\theta}}},{\boldsymbol{\theta}},t)\leq-(f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)-f(\mathbf{x},{\boldsymbol{\theta}},t)+\varepsilon(t))(\hat{{\boldsymbol{\theta}}}-{\boldsymbol{\theta}})^{T}\boldsymbol{\alpha}(\mathbf{x},t)
- \frac{D}{4 D_1^2}\varepsilon^2(t)\nonumber
\\
& &
\leq-\frac{1}{D}(f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)-f(\mathbf{x},{\boldsymbol{\theta}},t))^2+\frac{1}{D_1}|\varepsilon(t)||f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)-f(\mathbf{x},{\boldsymbol{\theta}},t)|\nonumber\\
& & - \frac{D}{4 D_1^2}\varepsilon^2(t) \leq -
\frac{1}{D}\left(|f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)-f(\mathbf{x},{\boldsymbol{\theta}},t)|-\frac{D}{2 D_1} \varepsilon(t)\right)^2 \leq 0
\end{eqnarray}
It follows immediately from
(\ref{parameric_deviation_derivative}), (\ref{V_theta}) that
\begin{equation}\label{eq:parametric_norm}
\|\hat{{\boldsymbol{\theta}}}(t)-{\boldsymbol{\theta}}\|^{2}_{\Gamma^{-1}}\leq
\|\hat{{\boldsymbol{\theta}}}(t_0)-{\boldsymbol{\theta}}\|^{2}_{\Gamma^{-1}}+\frac{D}{2 D_1^2}\|\varepsilon(t)\|^{2}_{2,[t_0,\infty]}
\end{equation}
In particular, for $t\in[t_0,T^\ast]$ we can derive from
(\ref{V_theta}) that
$\|\hat{{\boldsymbol{\theta}}}(t)-{\boldsymbol{\theta}}\|^{2}_{\Gamma^{-1}}\leq
\|\hat{{\boldsymbol{\theta}}}(t_0)-{\boldsymbol{\theta}}\|^{2}_{\Gamma^{-1}}+\frac{D}{2 D_1^2}\|\varepsilon(t)\|^{2}_{2,[t_0,T^\ast]}$. Therefore
$\hat{{\boldsymbol{\theta}}}(t)\in L_\infty^2[t_0,T^\ast]$. Furthermore
$|f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)-f(\mathbf{x}(t),{\boldsymbol{\theta}},t)|-\frac{D}{2 D_1} \varepsilon(t)\in L_{2}^1 [t_0,T^\ast]$. In particular
\begin{eqnarray}\label{eq:t1_ins1}
&
&\left\||f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)-f(\mathbf{x}(t),{\boldsymbol{\theta}},t)|-\frac{D}{2 D_1} \varepsilon(t)\right\|_{2,[t_0,T^\ast]}^2\leq
\nonumber\\
&&\frac{D}{2}\|{\boldsymbol{\theta}}-\hat{{\boldsymbol{\theta}}}(t_0)\|^{2}_{\Gamma^{-1}}+\frac{D^2}{4 D_1^2}\|\varepsilon(t)\|_{2,[t_0,T^\ast]}^2
\end{eqnarray}
Hence $f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)-f(\mathbf{x}(t),{\boldsymbol{\theta}},t)\in L_{2}^1 [t_0,T^\ast]$ as a sum of two functions from $L_{2}^1
[t_0,T^\ast]$. In order to estimate the upper bound of the norm
$\|f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)-f(\mathbf{x}(t),{\boldsymbol{\theta}},t)\|_{2,[t_0,T^\ast]}$
from (\ref{eq:t1_ins1}) we use the Minkowski inequality:
\begin{eqnarray}
&&\left\|f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)-f(\mathbf{x}(t),{\boldsymbol{\theta}},t)|-\frac{D}{2 D_1} \varepsilon(t)\right\|_{2,[t_0,T^\ast]}\leq
\nonumber\\
&&\left(\frac{D}{2}\|{\boldsymbol{\theta}}-\hat{{\boldsymbol{\theta}}}(t_0)\|^{2}_{\Gamma^{-1}}\right)^{0.5}+
\frac{D}{2 D_1}\|\varepsilon(t)\|_{2,[t_0,T^\ast]}\nonumber
\end{eqnarray}
and then apply the triangle inequality to the functions from
$L_{2}^1 [t_0,T^\ast]$:
\begin{eqnarray}\label{eq:t1_ins2}
& &
\|f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)-f(\mathbf{x}(t),{\boldsymbol{\theta}},t)\|_{2,[t_0,T^\ast]}\leq\nonumber\\
& &
\left\|f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)-f(\mathbf{x}(t),{\boldsymbol{\theta}},t)-\frac{D}{2 D_1}\varepsilon(t)\right\|_{2,[t_0,T^\ast]}+\\
& & \frac{D}{2 D_1}\|\varepsilon(t)\|_{2,[t_0,T^\ast]}\leq
\left(\frac{D}{2}\|{\boldsymbol{\theta}}-\hat{{\boldsymbol{\theta}}}(t_0)\|^{2}_{\Gamma^{-1}}\right)^{0.5}
+ \frac{D}{D_1}\|\varepsilon(t)\|_{2,[t_0,T^\ast]}\nonumber
\end{eqnarray}
Therefore, property P1) is proven.

Let us prove property P2). In order to do this we have to check first if the solutions of the closed loop system are defined for all $t\in\mathbb{R}_+$, i.e. they do not go to infinity in finite time.
We prove this by a contradiction argument. Indeed, let there exists time instant $t_s$ such that $\|\mathbf{x}(t_s)\|=\infty$. It follows from P1), however, that
$f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)-f(\mathbf{x}(t),{\boldsymbol{\theta}},t)\in L_{2}^1
[t_0,t_s]$. Furthermore, according to (\ref{eq:t1_ins2}) the norm
$\|f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)-f(\mathbf{x}(t),{\boldsymbol{\theta}},t)\|_{2,[t_0,t_s]}$
can be bounded from above by a continuous function of ${\boldsymbol{\theta}},
\ \hat{{\boldsymbol{\theta}}}(t_0)$, $\Gamma$, and
$\|\varepsilon(t)\|_{2,[t_0,\infty]}$. Let us denote this bound by symbol $D_f$. Notice that $D_f$ does not depend on $t_s$. Consider system (\ref{error_model_d}) for $t\in[t_0,t_s]$:
${\dot\psi}=f(\mathbf{x},{\boldsymbol{\theta}},t)-f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)-\varphi(\psi,{\boldsymbol{\omega}},t)+\varepsilon(t)$.
Given that both
$f(\mathbf{x}(t),{\boldsymbol{\theta}},t)-f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t),
\varepsilon(t) \in L_{2}^1 [t_0,t_s]$ and taking into account Assumption \ref{assume:gain}, we automatically obtain that
$\psi(\mathbf{x}(t),t)\in L_\infty^{1}[t_0,t_s]$. In particular, using the triangle inequality and the fact that the function
$\gamma_{\infty,2}\left(\psi(\mathbf{x}_0,t_0),{\boldsymbol{\omega}},M\right)$ in Assumption \ref{assume:gain} is non-decreasing in $M$, we can estimate the norm $\|\psi(\mathbf{x}(t),t)\|_{\infty,[t_0,t_s]}$ as follows:
\begin{equation}\label{eq:bound_psi}
\|\psi(\mathbf{x}(t),t)\|_{\infty,[t_0,t_s]}\leq
\gamma_{\infty,2}\left(\psi(\mathbf{x}_0,t_0),{\boldsymbol{\omega}},D_f+\|\varepsilon(t)\|^2_{2,[t_0,\infty]}\right)
\end{equation}
According to Assumption \ref{assume:psi} the following inequality holds:
\begin{equation}\label{eq:bound_x}
\|\mathbf{x}(t)\|_{\infty,[t_0,t_s]}\leq\tilde{\gamma}\left(\mathbf{x}_0,{\boldsymbol{\theta}},\gamma_{\infty,2}\left(\psi(\mathbf{x}_0,t_0),{\boldsymbol{\omega}},D_f+\|\varepsilon(t)\|^2_{2,[t_0,\infty]}\right)\right)
\end{equation}
Given that a superposition of locally bounded functions is locally bounded, we conclude that $\|\mathbf{x}(t)\|_{\infty[t_0,t_s]}$ is bounded. This, however, contradicts to the previous claim that
$\|\mathbf{x}(t_s)\|=\infty$. Taking into account inequality
(\ref{eq:parametric_norm}) we can derive that both
$\hat{{\boldsymbol{\theta}}}(\mathbf{x}(t),t)$ and $\hat{{\boldsymbol{\theta}}}_I(t)$ are bounded for every $t\in\mathbb{R}_+$. Moreover, according to
(\ref{eq:bound_psi}), (\ref{eq:bound_x}),
(\ref{eq:parametric_norm}) these bounds are themselves locally bounded functions of initial conditions and parameters. Therefore,
$\mathbf{x}(t)\in L^n_\infty[t_0,\infty]$,
$\hat{{\boldsymbol{\theta}}}(\mathbf{x}(t),t)\in L^d_\infty [t_0,\infty]$.
Inequality (\ref{eq:psi_gain}) follows immediately from
(\ref{eq:t1_ins2}), (\ref{eq:gain_psi_L2}), and the triangle inequality. Property P2) is proven.

Let us show that P3) holds. It is assumed that system
(\ref{eq:target_dynamics}) has $L_{2}^1 [t_0,\infty]\mapsto L_{p}^1 [t_0,\infty]$, $p>1$ gain. In addition, we have just shown that $f(\mathbf{x}(t),{\boldsymbol{\theta}},t)-f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t),
\varepsilon(t) \in L_{2} [t_0,\infty]$. Hence, taking into account equation (\ref{error_model_d}) we conclude that
$\psi(\mathbf{x}(t),t)\in L_{p}^1 [t_0,\infty]$, $p>1$. On the other hand, given that $f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)$,
$\varphi(\psi,{\boldsymbol{\omega}},t)$ are locally bounded with respect to their first two arguments uniformly in $t$, and that $\mathbf{x}(t)\in L_{\infty}^n[t_0,\infty]$,$\psi(\mathbf{x}(t),t)\in L_\infty^1[t_0,\infty]$, $\hat{{\boldsymbol{\theta}}}(t)\in L_\infty^d[t_0,\infty]$, ${\boldsymbol{\theta}}\in\Omega_\theta$, the signal
$\varphi(\psi(\mathbf{x}(t),t),{\boldsymbol{\omega}},t)+f(\mathbf{x}(t),{\boldsymbol{\theta}},t)-f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)$
is bounded. Then $\varepsilon(t)\in L_\infty^1[t_0,\infty]$
implies that ${\dot\psi}$ is bounded, and P3) is guaranteed by Barbalat's lemma.

To complete the proof of the theorem (property P4) consider the time-derivative of function $f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)$:
\[
\begin{split}
&\frac{d}{dt}f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)=L_{\mathbf{f}(\mathbf{x},{\boldsymbol{\theta}})+\mathbf{g}(\mathbf{x})u(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)}f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)+\\
& \frac{{\partial} f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)}{{\partial} \hat{{\boldsymbol{\theta}}}}\Gamma
(\varphi(\psi,{\boldsymbol{\omega}},t)+{\dot\psi})\boldsymbol{\alpha}(\mathbf{x},t)+\frac{{\partial} f(\mathbf{x},\hat{{\boldsymbol{\theta}}},t)}{{\partial} t}
\end{split}
\]
Taking into account that the function $f(\mathbf{x},{\boldsymbol{\theta}},t)$ is continuously differentiable in $\mathbf{x}$, ${\boldsymbol{\theta}}$; the derivative
$ {\partial} {f(\mathbf{x},{\boldsymbol{\theta}},t)}/{{\partial} t}$ is locally bounded with respect to $\mathbf{x}$, ${\boldsymbol{\theta}}$ uniformly in $t$; functions
$\boldsymbol{\alpha}(\mathbf{x},t)$, ${\partial} \psi(\mathbf{x},t)/{\partial} t$ are locally bounded with respect to $\mathbf{x}$ uniformly in $t$, then $d/dt
(f(\mathbf{x},{\boldsymbol{\theta}},t)-f(\mathbf{x},\hat{\boldsymbol{\theta}},t))$ is bounded. Then given that
$f(\mathbf{x}(t),{\boldsymbol{\theta}},t)-f(\mathbf{x}(t),\hat{{\boldsymbol{\theta}}}(t),t)\in L_{2}^1
[t_0,\infty]$ by applying Barbalat's lemma we conclude that
$f(\mathbf{x},{\boldsymbol{\theta}},\tau)-f(\mathbf{x},\hat{\boldsymbol{\theta}},\tau)\rightarrow 0$
as $t\rightarrow\infty$. { The theorem is proven.}

\subsection{Proof of Theorem \ref{theorem:interconnection}}

Let us denote
\[
\Delta f_x[t_0,T]=
\|f_x(\mathbf{x},{\boldsymbol{\theta}}_x,t)-f_x(\mathbf{x},\hat{{\boldsymbol{\theta}}}_x,t)\|_{2,[t_0,T]},
\]
\[
\Delta f_y
[t_0,T]=\|f_x(\mathbf{y},{\boldsymbol{\theta}}_y,t)-f_y(\mathbf{y},\hat{{\boldsymbol{\theta}}}_y,t)\|_{2,[t_0,T]}.
\]
As follows from Theorem \ref{stability_theorem} the following inequalities hold
\begin{equation}\label{proof:interconnection:t1}
\Delta f_x[t_0,T]\leq C_x + \frac{D_x}{D_{1,x}}
\|h_y(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]}
\end{equation}
\begin{equation}\label{proof:interconnection:t2}
\Delta f_y[t_0,T]\leq C_y + \frac{D_y}{D_{1,y}}
\|h_x(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]},
\end{equation}
where $C_x$, $C_y$ are some constants, independent of $T$. Taking estimates (\ref{proof:interconnection:t1}),
(\ref{proof:interconnection:t2}) into account we obtain the following estimates:
\begin{equation}\label{proof:interconnection:t3}
\begin{split}
&\Delta f_x[t_0,T]+\|h_y(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]}\leq \\
&C_x + \left(\frac{D_x}{D_{1,x}}+1\right)
\|h_y(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]}
\end{split}
\end{equation}
\begin{equation}\label{proof:interconnection:t4}
\begin{split}
&\Delta f_y[t_0,T]+\|h_x(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]}\leq
\\
&C_y + \left(\frac{D_y}{D_{1,y}}+1\right)
\|h_x(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]},
\end{split}
\end{equation}
The proof of the theorem would be complete if we show that the
$L_2^1[t_0,T]$ norms of $h_x(\mathbf{x}(t),\mathbf{y}(t),t)$,
$h_y(\mathbf{x}(t),\mathbf{y}(t),t)$ are globally bounded uniformly in $T$.
Let us show that this is indeed the case. Using the widely known generalized triangular inequality \cite{Jiang_1994}
\[
\gamma(a + b)\leq \gamma((\rho+Id)(a))+\gamma((\rho+Id)\circ
\rho^{-1}(b)), \ a,b\in\mathbb{R}_+, \ \gamma,\rho\in\mathcal{K}_\infty,
\]
equations (\ref{proof:interconnection:t3}),
(\ref{proof:interconnection:t4}) and also property
(\ref{eq:disturbance_gain}), we conclude that
\begin{equation}\label{proof:interconnection:t5}
\begin{split}
&\|h_y(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]}\leq\\
&\beta_y\cdot \gamma_{y_{2,2}}\circ\rho_1
\left(\left(\frac{D_y}{D_{1,y}}+1\right)\|h_x(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]}\right)+C_{y,1}\\
&\|h_x(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]}\leq\\
& \beta_x\cdot \gamma_{x_{2,2}}\circ\rho_2
\left(\left(\frac{D_x}{D_{1,x}}+1\right)\|h_y(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]}\right)+C_{x,1}
\end{split}
\end{equation}
where $\rho_1(\cdot)$, $\rho_2(\cdot)\in\mathcal{K}_\infty$,
$\rho_1(\cdot), \rho_2(\cdot)>Id(\cdot)$. Then, according to
(\ref{proof:interconnection:t5}), the existence of
$\rho_3(\cdot)\in\mathcal{K}_\infty\geq Id(\cdot)$, satisfying inequality
\[
\beta_y\circ\gamma_{y_{2,2}}\circ\rho_1\circ\left(\frac{D_y}{D_{y,1}}+1\right)\circ\rho_3\circ
\beta_x\circ
\gamma_{x_{2,2}}\circ\rho_2\circ\left(\frac{D_x}{D_{x,1}}+1\right)(\Delta)<
\Delta \ \forall \ \Delta\geq \bar{\Delta}
\]
for some $\bar{\Delta}\in\mathbb{R}_+$ ensures that the norms
\[
\|h_y(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]}, \
\|h_x(\mathbf{x}(t),\mathbf{y}(t),t)\|_{2,[t_0,T]}
\]
are globally uniformly bounded in $T$. The rest of the proof follows from Theorem \ref{stability_theorem}. { The theorem is proven.}
\title{Grounded Relational Inference: Domain Knowledge Driven Explainable Autonomous Driving}

\begin{abstract}
Explainability is essential for autonomous vehicles and other robotics systems interacting with humans and other objects during operation. Humans need to understand and anticipate the actions taken by the machines for trustful and safe cooperation. In this work, we aim to develop an explainable model that generates explanations consistent with both human domain knowledge and the model's inherent causal relation. In particular, we focus on an essential building block of autonomous driving\textemdash multi-agent interaction modeling. We propose Grounded Relational Inference (GRI). It models an interactive system's underlying dynamics by inferring an interaction graph representing the agents' relations. We ensure a semantically meaningful interaction graph by grounding the relational latent space into semantic interactive behaviors defined with expert domain knowledge. We demonstrate that it can model interactive traffic scenarios under both simulation and real-world settings, and generate semantic graphs explaining the vehicle's behavior by their interactions.

\end{abstract}

\section{Introduction}
\label{sec:introduction}
\IEEEPARstart{D}{eep} learning has been utilized to address various autonomous driving problems \cite{bojarski2016end,chen2017multi,chen2019dob}. However, deep neural networks lack the transparency that helps people understand their underlying mechanism. It is a crucial drawback for safety-critical applications with humans involved (e.g., autonomous vehicles). Humans need to understand and anticipate the actions taken by the machines for trustful and safe cooperation. In response to this problem, the concept of explainable AI (XAI) was introduced. It refers to machine learning techniques that provide details and reasons that make a model's mechanism easy to understand \cite{arrieta2020explainable}. Most of the existing works for deep learning models focus on post-hoc explanations \cite{arrieta2020explainable}. They enhance model explainability by unraveling the underlying mechanisms of a trained model: Vision-based approaches, such as visual attention \cite{kim2017interpretable} and deconvolution \cite{bojarski2018visualbackprop}, illustrate which segments of the input image affect the outputs; Interaction-aware models, such as social LSTM with social attention \cite{alahi2016social,vemula2018social} and graph neural networks (GNN) with graph attention \cite{hoshen2017vain, velivckovic2017graph, sukhbaatar2016learning, kipf2018neural}, identify the agents that are critical to the decision-making procedure.

Although promising, post-hoc explanations could be ambiguous and falsely interpreted by humans. \textcolor{black}{For instance, a visual attention map only illustrates which regions of the input image the output of the model depends on. The semantic meaning behind the causal relation is left for human users to interpret. Kim et al. \cite{kim2018textual} attempted to resolve the ambiguity by aligning textual explanations with visual attention. However, the underlying mechanism of the model is not necessarily consistent with the textual explanations. To truly build trust with humans, we argue that a deep learning model for an autonomous system should be equipped with explanations consistent with both \emph{human domain knowledge} and the model's \emph{inherent causal relation}}.

\textcolor{black}{In this work, we explore how to approach such an explainable model for an essential building block of autonomous driving\textemdash multi-agent interaction modeling. In particular, we focus on the relational inference problem studied in \cite{kipf2018neural}. Kipf et al. propose the Neural Relational Inference (NRI) model, which models an interactive system by explicitly inferring its inherent interactions. Formally, the NRI model aims to solve a reconstruction task. Given the observed trajectories of all the objects, an encoder first infers the interactions between objects represented by a latent interaction graph, whose edges are aligned with discrete latent variables corresponding to a cluster of pairwise interaction behaviors between the objects. Afterward, a decoder that learns the dynamical model conditioned on the inferred interaction graph then reconstructs the trajectories given the initial states. If the decoder can accurately reconstruct the trajectories, it indicates that the latent space effectively models the interactions.}

\textcolor{black}{We find this discrete latent space interesting because the inferred interaction graph could potentially serve as an explanation directly: it explains the reconstructed trajectories as a sequence of interaction behaviors among agents. Moreover, the reconstructed trajectories are governed by the same interaction graph. Therefore, the NRI model seems promising to fulfill our goal to make the explanation consistent with the model's underlying mechanism. However, since the NRI model learns the latent space in an unsupervised manner, it is difficult for humans to interpret the semantic meaning behind those interaction behaviors, which makes the interaction graph ambiguous as an explanation. To address this issue, we propose to ground the latent space in a set of interactive behaviors defined with human domain knowledge.}

\begin{figure*}[t]
\centering
\includegraphics[width=6.4in]{example.pdf}
\caption{A motivating lane-changing scenario where we ask different models to control the red vehicle. All the models generate deceleration commands but have different intermediate outputs. With the aid of visual attention, we generate a heat map indicating the critical pixels of the input image. Graph attention network assigns edge weights $\omega_i$ to specify the importance of surrounding vehicles to the controlled vehicle. However, the attention mechanisms cannot recognize different effects\textemdash the two cars are mutually important but affect each other in distinct ways. The NRI model can distinguish between different interactive behaviors by assigning different values to the latent variables $z_i$ in the interaction graph. Still, the latent space does not have explicit semantic meaning. In contrast, our model ensures a semantic interaction graph, which illustrates the model's understanding of the scenario and explain the action it takes. It determines the interaction graph with a latent space grounded in yielding and cutting-in behaviors. It learns control policies that generate behaviors consistent with their definitions in domain knowledge (e.g., traffic rules) and executes the corresponding policies according to the inferred edge types.}
\label{fig:example}
\end{figure*}

As a running example, consider the scenario depicted in Fig. \ref{fig:example}, where we ask different models to control the red vehicle. Attention mechanisms can indicate the critical pixels or agents, but they cannot recognize different effects\textemdash the two cars are mutually important but affect each other in distinct ways. The NRI model can distinguish between different interactive behaviors. Still, the latent space does not have explicit semantic meaning. In contrast, our model should determine the interaction graph with a latent space grounded in yielding and cutting-in behaviors. It learns control policies that generate behaviors consistent with their definitions in domain knowledge (e.g., traffic rules) and executes the corresponding policies according to the inferred edge types. This semantic interaction graph illustrates the model's understanding of the scenario and explains the action it takes.

\textcolor{black}{If we merely want to make the interaction graph consistent with humans' labeling of the scenes, a straightforward approach is training the encoder directly via supervised learning.} Interaction labels can be either obtained from human experts \cite{sun2018probabilistic} or rule-based labeling functions \cite{lee2019joint}. \textcolor{black}{However, labels for the interaction graph are insufficient to induce the decoder to synthesize the interactive behaviors suggested by the labels, because the model cannot capture the semantic meaning behind those interaction labels.} Instead, we recast relational inference into an inverse reinforcement learning (IRL) problem and introduce structured reward functions to ground the latent space. Concretely, we model the system as a multi-agent Markov decision process (MDP), where the agents share a reward function that depends on the relational latent space. We design structured reward functions based on expert domain knowledge to explicitly define the interactive behaviors corresponding to the latent space. To solve the formulated IRL problem, we propose Grounded Relational Inference (GRI). It has a variational-autoencoder-like (VAE) GNN in NRI \cite{kipf2018neural} as the backbone model. Additionally, we incorporate the structured reward functions into the model as an additional reward decoder. A variational extension of the adversarial inverse reinforcement learning (AIRL) algorithm is derived to train all the modules simultaneously.

\textcolor{black}{Compared to direct supervision via interaction labels, we provide implicit supervision to GRI in terms of the structures of the reward functions. Since each reward function defines a type of interactive behavior, we confine the latent space to a cluster of interactive behaviors. It mainly has two advantages over supervision through labeling: 1) First, since the policy decoder learns to maximize the cumulative reward given the inferred interaction graph, the structured reward functions guide the policy to synthesize the corresponding semantic behaviors, rather than simply mimicking the demonstrated trajectories; 2) Second, the end-to-end training scheme leaves the model to identify the underlying interaction graph of the observed trajectories and learn the characteristics of different behaviors (i.e., parameters of reward functions) from data. It avoids the undesired bias introduced during the labeling procedure. Labels generated by human experts are subjective. Different people may interpret an interacting scenario in different ways. In contrast, there exist systematic and principled ways to investigate what reward functions human behavior is subject to from data \cite{naumann2020analyzing}.}

\textcolor{black}{The remaining content is organized as follows. In Section \ref{sec:related-work}, we gives a concise review on existing works that are closely related to ours in terms of methodology or motivation. In Section \ref{sec:background}, we briefly summarize NRI and AIRL to prepare the readers for the core technical content. In Section \ref{sec:formulation}, we introduce how we reformulate relational inference into a multi-agent IRL problem with relational latent space. In Sec \ref{sec:method}, we present the GRI model in a general context. In Section \ref{sec:experiments}, we demonstrate how we apply the proposed framework to model some simple traffic scenarios in both simulation and real-world settings. The experimental results show that GRI can model interactive traffic scenarios, and generate semantic interaction graphs that are consistent with both human domain knowledge and the modeled interactive behaviors.}

\section{Related Work}
\label{sec:related-work}
Our model combines graph neural networks and adversarial inverse reinforcement learning for interactive system modeling. This section gives a concise review on these two topics and summarizes the existing works closely related to ours. We also discuss some additional works on explainable driving models as a complement to the discussion in Sec. \ref{sec:introduction}.

{\bf Interaction modeling using GNN.} GNN has been widely applied for interactive system modeling in recent years \cite{sukhbaatar2016learning, van2018relational, battaglia2016interaction}. One category of models we find interesting is those with graph attention mechanism. One seminal work is Graph Attention Network (GAT) \cite{velivckovic2017graph} which performed well on large-scale inductive classification problems. VAIN \cite{hoshen2017vain} applied attention in multi-agent modeling. The attention map unravels the interior interaction structure to some extent which improves the explainability of VAIN. An approach closely related to ours is NRI \cite{kipf2018neural}, which modeled the interaction structure explicitly with discrete relational latent space compared to the continuous graph attention. We explain the difference between NRI and our proposed method in Sec. \ref{sec:introduction} and \ref{sec:method}. A related work in the autonomous driving domain is \cite{lee2019joint}, which also modeled interactive driving behavior with semantically meaningful interactions but in a supervised manner.

\textcolor{black}{Another type of models we want to mention is the spatio-temporal graphs (st-graph). St-graph decomposes a complex problem into components and their spatio-temporal interactions, which are represented by nodes and edges of a factor graph. It makes st-graph an ubiquitous representation for interacting systems, e.g., human motion \cite{jain2016structural}, human-robot interaction \cite{liu2021decentralized}, and traffic flow \cite{yu2017spatio}. Jain et al. \cite{jain2016structural} proposed a general method to transform any st-graph to a mixture of RNNs called structural-RNN (S-RNN). When using GRUs, our GNN policy decoder is similar to S-RNN, as they capture the same spatio-temporal dependency. In particular, Liu et al. \cite{liu2021decentralized} combined S-RNN with model-free RL to obtain a structured policy for robot crowd navigation. In terms of the underlying MDP, our GRI model is developed based on a multi-agent MDP, whereas theirs has a single robot as the agent and regards the surrounding humans as parts of the environment. In addition, we adopt a structured reward function for each agent based on the graph, and introduce a relational latent space into the MDP. }

\textcolor{black}{{\bf Adversarial IRL and Imitation Learning.} Now we give a brief review of related works on adversarial IRL. We also include prior works related to generative adversarial imitation learning (GAIL) \cite{ho2016generative}, because GAIL is closely connected to AIRL \cite{finn2016connection}. Both methods have GANs as the backbone models, and learn the discriminator through MaxEntIRL. The difference is that GAIL uses a unstructured discriminator and does not use the generator’s density.}

Our work is mainly related to two categories of methods: multi-agent and latent AIRL/GAIL algorithms. Yu et al. \cite{yu2019multi} proposed a multi-agent AIRL framework for Markov games under correlated equilibrium. It is capable of modeling general heterogeneous multi-agent interactions. The PS-GAIL algorithm \cite{bhattacharyya2018multi} considered a multi-agent environment in the driving domain that is similar to ours\textemdash homogeneous agents with shared policy under centralized control\textemdash and extended GAIL \cite{ho2016generative} to model the interactive behaviors. In \cite{bhattacharyya2019simulating}, they augmented the reward in PS-GAIL as a principle manner to specify prior knowledge, which shares the same spirit with the structured reward functions in GRI.

Latent AIRL models integrate a VAE into either the discriminator or the generator for different purposes. Wang et al. \cite{wang2017robust} conditioned the discriminator on the embeddings generated by a VAE trained separately using behavior cloning. The VAE encodes trajectories into low-dimensional space, enabling the generator to produce diverse behaviors from limited demonstration. VDB \cite{peng2018variational} constrained information contained in the discriminator's internal representation to balance the training procedure for adversarial learning algorithms. The PEMIRL framework \cite{yu2019meta} achieved meta-IRL by encoding demonstration into a contextual latent space. Though studied in different context, PEMIRL is conceptually similar to our framework as both its generator and discriminator depend on the inferred context variables.

{\bf Explainable Autonomous Driving.} At the end of this section, we discuss some additional works related to explainable autonomous driving as a complement to those we have mentioned in Sec. \ref{sec:introduction}. They addressed some shortcomings of the discussed approaches, especially those methods based on attention mechanisms. Kim et al. \cite{kim2018textual} trained a textual explanation generator concurrently with a visual-attention-based controller in a supervised manner. It generates sentences explaining the control action as a consequence of certain objects highlighted in the attention map, which can be easily interpreted compared to visual attention. Another issue of attention that has been raised in the literature is causal confusion \cite{de2019causal}. The model does not necessarily assign high attention weights to objects/regions that influence the control actions. In \cite{kim2017interpretable}, a fine-grained decoder was proposed to refine visual attention maps and detect critical regions through causality tests. In \cite{li2020make}, Li et al. adopted a similar idea for object-level reasoning. Causal inference was applied to identify risk objects in driving scenes. One interesting observation was that the detection accuracy was improved with intervention during the training stage, i.e., augmenting the training data by masking out non-causal objects. However, intervention requires explicit prior knowledge on the causal relations to label the casual and non-causal objects in a scene. Similar to intention labels, such kind of labels is generally prohibitive due to the intricate nature of human cognition.

\section{Background}
\label{sec:background}
In this section, we would like to briefly summarize two algorithms that are closely related to our approach, in order to prepare the readers for the core technical content.

\subsection{Neural Relational Inference (NRI)}\label{subsec:nri}
Kipf et al. \cite{kipf2018neural} represent an interacting system with $N$ objects as a complete bi-directed graph $\mathcal{G_{\mathrm{scene}} = (V, E)}$ with vertices $\mathcal{V}=\left\{v_i\right\}_{i=1}^{N}$ and edges $\mathcal{E}=\left\{e_{i,j}=(v_i, v_j) \mid i \neq j \right\}$. The edge $e_{i,j}$ refers to the one pointing from the vertex $v_i$ to $v_j$. Each vertex corresponds to an object in the system. The NRI model is formalized as a VAE with a GNN encoder inferring the underlying interactions and a GNN decoder synthesizing the system dynamics given the interactions.

Formally, the model aims to reconstruct a given state trajectory, denoted by $\mathbf{x}=\left(\mathbf{x}^0,\dots, \mathbf{x}^{T-1}\right)$, where $T$ is the number of timesteps and $\mathbf{x}^t=\left\{\mathbf{x}^t_1,\dots,\mathbf{x}^t_N\right\}$. The vector $\mathbf{x}^t_i\in{\mathbb{R}^n}$ denotes the state vector of object $v_i$ at time $t$. Alternatively, the trajectory can be decomposed into $\mathbf{x}=(\mathbf{x}_1, \dots, \mathbf{x}_N)$, where $\mathbf{x}_i=\left\{\mathbf{x}^0_i,\dots,\mathbf{x}^{T-1}_i\right\}$. The encoder operates over $\mathcal{G}_\mathrm{scene}$, with $\mathbf{x}_i$ as the node feature of $v_i$. It infers the posterior distribution of the edge type ${z}_{i,j}$ for all the edges, collected into a single vector $\mathbf{z}$. The decoder operates over an interaction graph $\mathcal{G}_\mathrm{interact}$ and reconstructs $\mathbf{x}$. The graph $\mathcal{G}_\mathrm{interact}$ is constructed by assigning sampled $\mathbf{z}$ to the edges of $\mathcal{G}_\mathrm{scene}$ and assigning the initial state to the nodes of $\mathcal{G}_\mathrm{scene}$. If $\mathcal{G}_\mathrm{interact}$ represents the interactions sufficiently, the decoder should be able to reconstruct the trajectory accurately.

The model is trained by maximizing the evidence lower bound (ELBO):
\begin{equation*}
\mathcal{L}=\mathbb{E}_{q_\phi(\mathbf{z}\vert\mathbf{x})}\left[\log p_\gamma (\mathbf{x}\vert\mathbf{z})\right]-D_{KL} \left[q_\phi(\mathbf{z}\vert\mathbf{x})\vert\vert p (\mathbf{z})\right],
\end{equation*}
where $q_\phi(\mathbf{z}\vert\mathbf{x})$ is the encoder output which can be factorized as:
\begin{equation}
q_\phi(\mathbf{z}\vert\mathbf{x})=\prod_{i=1}^N\prod_{j=1, j\neq i}^N q_\phi(z_{i,j}\vert \mathbf{x}), \label{eqn:facto}
\end{equation}
where $\phi$ refers to the parameters of the encoder. The decoder output $p_\gamma(\mathbf{x}\vert\mathbf{z})$ can be written as:
\begin{equation*}
p_\gamma(\mathbf{x}\vert\mathbf{z})=\prod_{t=0}^{T-1}p_\gamma(\mathbf{x}^{t+1}\vert{\mathbf{x}^t, \dots, \mathbf{x}^0, \mathbf{z}}),
\end{equation*}
where $\gamma$ refers to the parameters of the decoder.

\subsection{Adversarial Inverse Reinforcement Learning (AIRL)}\label{subsec:airl}
The AIRL algorithm follows the principle of maximum entropy IRL \cite{ziebart2008maximum}. Consider a MDP defined by $(\mathcal{X, A, T}, r)$, where $\mathcal{X, A}$ are the state space and action space respectively. In the rest of the paper, we use $\mathbf{x}$ and $\mathbf{a}$ with any superscript or subscript to represent a state and action in $\mathcal{X}$ and $\mathcal{A}$. $\mathcal{T}$ is the transition operator given by $\mathbf{x}_{t+1}=f(\mathbf{a}_t, \mathbf{x}_t)$\footnote{The transition is assumed deterministic to simplify the notation. A more general form of the algorithm can be derived for stochastic systems, which is essentially the same with the deterministic case.}, and $r:\mathcal{X} \times \mathcal{A}\rightarrow \mathbb{R}$ is the reward function. The maximum entropy IRL framework assumes a suboptimal expert policy $\pi^\mathrm{E}(\mathbf{a}\vert\mathbf{x})$. The demonstration trajectories generated with the expert policy, $\mathcal{D^\mathrm{E}}=\left\{\boldsymbol{\tau}^\mathrm{E}_1, \dots \boldsymbol{\tau}^\mathrm{E}_M\right\}$ where $\boldsymbol{\tau}^\mathrm{E}_{i}=\left(\mathbf{x}_i^{\mathrm{E}, 0},\mathbf{a}_i^{\mathrm{E}, 0}, \dots, \mathbf{x}_i^{\mathrm{E}, T-1}, \mathbf{a}_i^{\mathrm{E}, T-1}\right)$, have probabilities increasing exponentially with the cumulative reward. Concretely, they follow a Boltzmann distribution:
\begin{equation*}
\boldsymbol{\tau}^\mathrm{E}_i\sim{\pi^\mathrm{E}(\boldsymbol{\tau})} = \frac{1}{Z}\exp\left(\sum_{t=0}^{T-1} r_\lambda(\mathbf{x}_t, \mathbf{a}_t)\right),
\end{equation*}
where $r_\lambda$ is the reward function with parameters denoted by $\lambda$. Maximum entropy IRL aims to infer the underlying reward function parameters of the expert policy. It is formalized as a maximum likelihood problem:
\begin{equation*}
\lambda^* = \mathrm{arg} \max_\lambda \mathbb{E}_{\boldsymbol{\tau}^\mathrm{E}\sim\pi^\mathrm{E}(\boldsymbol{\tau})}\left[\sum_{t=0}^{T-1} r_\lambda(\mathbf{x}^\mathrm{E}_t, \mathbf{a}^\mathrm{E}_t)\right] - \log Z.
\end{equation*}

To derive a feasible algorithm to solve the problem, we need to estimate the partition function $Z$. One practical solution is co-training a policy model with the current estimated reward function through reinforcement learning \cite{finn2016guided}. Finn et al. \cite{finn2016connection} found the equivalency between it and a special form of the generative adversarial network (GAN). The policy model is the generator, whereas a structured discriminator is defined with the reward function to distinguish a generated trajectory $\boldsymbol{\tau}^\mathrm{G}$ from a demonstrated one $\boldsymbol{\tau}^\mathrm{E}$. Fu et al. \cite{fu2017learning} proposed the AIRL algorithm based on it, using a discriminator that identifies generated samples based on the pairs of state and action instead of the entire trajectory to reduce variance:
\begin{equation}
\mathcal{D}_{\lambda,\eta}(\mathbf{x},\mathbf{a})=\frac{\exp\left\{r_\lambda(\mathbf{x},\mathbf{a})\right\}}{\exp\left\{r_\lambda(\mathbf{x},\mathbf{a})\right\}+\pi_\eta(\mathbf{a}\vert\mathbf{x})}, \label{eqn:dis}
\end{equation}
where $\pi_\eta(\mathbf{a}|\mathbf{x})$ is the policy model with parameters denoted by $\eta$. The models $\mathcal{D}_{\lambda,\eta}$ and $\pi_\eta$ are trained adversarially by solving the following min-max optimization problem:
\begin{equation}
\begin{split}
\min_\eta \max_{\lambda} \quad & \mathbb{E}_{\mathbf{x}^\mathrm{E}, \mathbf{a}^\mathrm{E}\sim\pi^\mathrm{E}(\mathbf{x,a})}\left[\log\left(\mathcal{D}_{\lambda,\eta}(\mathbf{x}^\mathrm{E},\mathbf{a}^\mathrm{E})\right)\right] \\
+ & \mathbb{E}_{\mathbf{x}^\mathrm{G}, \mathbf{a}^\mathrm{G}\sim\pi_\eta(\mathbf{x,a})}\left[\log\left(1-\mathcal{D}_{\lambda,\eta}(\mathbf{x}^\mathrm{G},\mathbf{a}^\mathrm{G})\right)\right], \label{eqn:opt}
\end{split}
\end{equation}
where $\pi^\mathrm{E}(\mathbf{x,a})$ denotes the distribution of state and action induced by the expert policy, and $\pi_\eta(\mathbf{x,a})$ is the distribution induced by the learned policy.

\section{Problem Formulation}
\label{sec:formulation}
Our GRI model grounds the relational latent space in a clustering of semantically meaningful interactions by reformulating the relational inference problem into a multi-agent IRL problem. Since the framework has the potential to be generalized to interactive systems in other domains apart from autonomous driving, we will introduce our approach in a general tone. However, it should be aware that we limit our discussion in this paper to autonomous driving problems, without claiming that it can be directly applied to other domains. GRI relies on expert domain knowledge to identify all possible semantic behaviors and design the corresponding reward functions. There exists a broad range of literature on interactive driving behavior modeling \cite{sun2018probabilistic, kesting2010enhanced}, which we can refer to when designing the rewards. We can extend the proposed framework to other fields if proper domain knowledge is available, which is left for future investigation.

We start with modeling the interactive system as a multi-agent MDP with graph representation. As in NRI, the system has an underlying interaction graph $\mathcal{G}_\mathrm{interact}$. The discrete latent variable $z_{i,j}$ takes a value from ${0, 1, \dots, K-1}$, where $K$ is the number of interactions. It indicates the type of relation between $v_i$ and $v_j$ in respect to its effect on $v_j$. Additionally, we assume the objects of the system are homogeneous intelligent agents who make decisions based on their interactions with others.

Concretely, each of them is modeled with identical state space $\mathcal{X}$, action space $\mathcal{A}$, transition operator $\mathcal{T}$ and reward function $r:\mathcal{X} \times \mathcal{A}\rightarrow \mathbb{R}$. At time step $t$, the reward of agent $v_j$ depends on the states and actions of itself and the pairwise interactions between itself and all its neighbors:
\begin{equation}
\begin{split}
&r_{\xi, \psi}(v^t_j, \mathbf{z}_j) = r_\xi^{n}(\mathbf{x}^t_j, \mathbf{a}^t_j) \\
& \quad\quad\quad + \sum_{i\in\mathcal{N}_j}\sum_{k=1}^{K}\mathbf{1}(z_{i,j}=k) r^{{e},k}_{\psi_k}(\mathbf{x}^t_i, \mathbf{a}^t_i, \mathbf{x}^t_j, \mathbf{a}^t_j), \label{eqn:reward}
\end{split}
\end{equation}
where $\mathbf{z}_j$ is the collection of $\left\{{z}_{i,j}\right\}_{i\in\mathcal{N}_j}$, $r_\xi^{n}$ is the node reward function parameterized by $\xi$, $\mathcal{N}_j$ is the set of $v_j$'s neighbouring nodes, $\mathbf{1}$ is the indicator function, and $r_{\psi_k}^{{e}, k}$ is the edge reward function parameterized by $\psi_k$ for the $k^\mathrm{th}$ type of interaction. We utilize expert domain knowledge to design $r_{\psi_k}^{{e}, k}$, so that the corresponding interactive behavior emerges by maximizing the rewards. Particularly, the edge reward equals to zero for $k=0$, indicating the action taken by $v_j$ does not depend on its interaction with $v_i$.

We assume the agents act cooperatively to maximize the cumulative reward of the system:
\begin{equation*}
\begin{split}
\mathcal{R}_{\xi, \psi}(\boldsymbol{\tau},\mathbf{z})&=\sum_{t=0}^{T-1}\mathbf{r}_{\xi, \psi}\left(\mathbf{x}^t, \mathbf{a}^t, \mathbf{z}\right) \\
&=\sum_{t=0}^{T-1}\sum_{j=1}^{N}r_{\xi, \psi}\left(v^t_j, \mathbf{z}_j\right),
\end{split}
\end{equation*}
with a joint policy denoted by $\boldsymbol{\pi}_\eta \left(\mathbf{a}^t\vert{\mathbf{x}^t, \mathbf{z}}\right)$. The cooperative assumption is not necessarily valid for generic traffic scenarios \cite{yu2019multi}, but it simplifies the training procedure significantly. We will leave the extension of the proposed method to non-cooperative interactive traffic scenarios as a future work.

Given a demonstration dataset, we aim to infer the underlying reward function and policy. Different from a typical IRL problem, both $r_{\xi, \psi}$ and $\pi_{\eta}$ depend on $\mathbf{z}$. Therefore, we need to infer the distribution $p(\mathbf{z}\vert\boldsymbol{\tau})$ to solve the IRL problem.

\section{Grounded Relational Inference}\label{sec:method}
We now present the Grounded Relational Inference model to solve the IRL problem specified in Sec. \ref{sec:formulation}. The model consists of three modules modeled by message-passing GNNs \cite{gilmer2017neural}: an encoder inferring the posterior distribution of edge types, a policy decoder generates control actions conditioned on the edge variables sampled from the posterior distribution, and a reward decoder models the rewards conditioned on the inferred edge types.

\subsection{Architecture}
The overall model structure is illustrated in Fig. \ref{fig:architect}. Given a demonstration trajectory $\boldsymbol{\tau}^\mathrm{E}\in\mathcal{D}^\mathrm{E}$, the encoder operates over $\mathcal{G}_\mathrm{scene}$ and approximates the posterior distribution $p(\mathbf{z}\vert\boldsymbol{\tau}^\mathrm{E})$ with $q_\phi(\mathbf{z}\vert\boldsymbol{\tau}^\mathrm{E})$. The policy decoder operates over a $\mathcal{G}_\mathrm{interact}$ sampled from the inferred $q_\phi(\mathbf{z}\vert\boldsymbol{\tau}^\mathrm{E})$ and models the policy $\boldsymbol{\pi}_\eta \left(\mathbf{a}^t\vert{\mathbf{x}^t, \mathbf{z}}\right)$. Given an initial state, we can generate a trajectory by sequentially sampling $\mathbf{a}^t$ from $\boldsymbol{\pi}_\eta \left(\mathbf{a}^t\vert{\mathbf{x}^t, \mathbf{z}}\right)$ and propagating the state. The state is propagated with either the transition operator $\mathcal{T}$ if given, or a simulating environment if $\mathcal{T}$ is not accessible. We denote a generated trajectory given the initial state of $\tau^\mathrm{E}$ as $\tau^{\mathrm{G}}$. Since these two modules are essentially the same in NRI, we omit the detailed model structures here and include them in Appx. \ref{app:model}.

\begin{figure*}[t]
\centering
\includegraphics[width=6.6in]{GRI-framework.pdf}
\caption{Architecture of grounded relational inference model. Given a demonstration trajectory $\boldsymbol{\tau}^\mathrm{E}\in\mathcal{D}^\mathrm{E}$, the encoder operates over $\mathcal{G}_\mathrm{scene}$ and approximates the distribution $p(\mathbf{z}\vert\boldsymbol{\tau}^\mathrm{E})$ with $q_\phi(\mathbf{z}\vert\boldsymbol{\tau}^\mathrm{E})$. The policy decoder operates over a $\mathcal{G}_\mathrm{interact}$ sampled from the inferred $q_\phi(\mathbf{z}\vert\boldsymbol{\tau}^\mathrm{E})$ and models the policy $\boldsymbol{\pi}_\eta \left(\mathbf{a}^t\vert{\mathbf{x}^t, \mathbf{z}}\right)$. Given the initial state of $\boldsymbol{\tau}^\mathrm{E}$, we sample a trajectory $\boldsymbol{\tau}^\mathrm{G}$ by sequentially sampling $\mathbf{a}^t$ from $\boldsymbol{\pi}_\eta \left(\mathbf{a}^t\vert{\mathbf{x}^t, \mathbf{z}}\right)$ and propagating the state. Finally, We use the reward GNN to compute the cumulative rewards of $\boldsymbol{\tau}^\mathrm{G}$ and $\boldsymbol{\tau}^\mathrm{E}$ conditioned on the sampled $\mathcal{G}_\mathrm{interact}$.} \label{fig:architect}
\end{figure*}

The reward decoder computes the reward of a state-action pair given the sampled edge variables. We use it to compute the cumulative rewards of $\boldsymbol{\tau}^\mathrm{G}$ and $\boldsymbol{\tau}^\mathrm{E}$ conditioned on the sampled $\mathcal{G}_\mathrm{interact}$. The reward decoder is in the form of Eqn. (\ref{eqn:reward}). Additionally, we augment the functions $r^n_\xi$ and $r^{e,k}_{\psi_k}$ with MLP shaping terms to mitigate the reward shaping effect \cite{fu2017learning}, resulting in:
\begin{equation}
f^n_{\xi,\omega}(\mathbf{x}^t_j, \mathbf{a}^t_j, \mathbf{x}^{t+1}_j) = r^n_{\xi}(\mathbf{x}^t_j, \mathbf{a}^t_j)+h^n_\omega(\mathbf{x}^{t+1}_j)-h^n_\omega(\mathbf{x}^t_j), \label{eqn:node_reward}
\end{equation}
and
\begin{equation}
\begin{split}
& f^{e,k}_{\psi_k, \chi_k}(\mathbf{x}^t_i, \mathbf{a}^t_i, \mathbf{x}^{t+1}_i, \mathbf{x}^t_j, \mathbf{a}^t_j, \mathbf{x}^{t+1}_j) = r^{{e},k}_{\psi_k}(\mathbf{x}^t_i, \mathbf{a}^t_i, \mathbf{x}^t_j, \mathbf{a}^t_j)\\
&\quad \quad \quad + h^{e,k}_{\chi_k}(\mathbf{x}^{t+1}_i, \mathbf{x}^{t+1}_j)-h^{e,k}_{\chi_k}(\mathbf{x}^{t}_i, \mathbf{x}^{t}_j), \label{eqn:edge_reward}
\end{split}
\end{equation}
where $h^n_{\omega}$ and $h^{e,k}_{\chi_k}$ are MLPs with parameters denoted by $\omega$ and $\chi$ respectively. We denote the shaped reward function of agent $v_j$ by $\mathbf{f}_{\xi,\omega,\psi,\chi}\left(\mathbf{x}^t, \mathbf{a}^t,\mathbf{x}^{t+1},\mathbf{z}\right)$, which equals to the left hand side of Eqn. (\ref{eqn:reward}) but with $r^n_\xi$ and $r^{e,k}_{\psi_k}$ substituted by the augmented rewards. The shaped reward function together with the policy model defines the discriminator which distinguishes $\boldsymbol{\tau}^\mathrm{G}$ from $\boldsymbol{\tau}^\mathrm{E}$:
\begin{equation*}
\begin{split}
&\mathcal{D}_{\xi, \omega, \psi, \chi, \eta}(\mathbf{x}^t, \mathbf{a}^t, \mathbf{x}^{t+1}, \mathbf{z}) \\
&\quad\quad\quad =\frac{\exp\left\{\mathbf{f}_{\xi,\omega,\psi,\chi}\left(\mathbf{x}^t, \mathbf{a}^t,\mathbf{x}^{t+1},\mathbf{z}\right)\right\}}{\exp\left\{\mathbf{f}_{\xi,\omega,\psi,\chi}\left(\mathbf{x}^t, \mathbf{a}^t, \mathbf{x}^{t+1}, \mathbf{z}\right)\right\}+\boldsymbol{\pi}_\eta\left(\mathbf{a}^t\vert \mathbf{x}^t, \mathbf{z}\right)}.
\end{split}
\end{equation*}

\subsection{Training}
We aim to train the three modules simultaneously. Consequently, we incorporate the encoder model $q_\phi\left(\mathbf{z}\vert \boldsymbol{\tau}^\mathrm{E}\right)$ into the objective function of AIRL, resulting in the optimization problem (\ref{eqn:opt-2}). The encoder is integrated into the minimization problem because the reward function has a direct dependence on the latent space. The model is then trained by solving problem (\ref{eqn:opt-2}) in an adversarial scheme: we alternate between training the encoder and reward for the minimization problem and training the policy for the maximization problem. Specifically, the objective for the encoder and reward is the following minimization problem given fixed $\eta$:
\begin{equation}
\begin{aligned}
\min_{\xi, \omega, \psi, \chi, \phi}\quad & \mathcal{J}(\xi, \omega, \psi, \chi, \phi, \eta) \\
\textrm{s.t.}\quad & \mathbb{E}\left\{D_{KL}\left[q_\phi\left(\mathbf{z}\vert \boldsymbol{\tau}^\mathrm{E}\right))\vert\vert p(\mathbf{z})\right]\right\}\leqslant I_c. \label{eqn:min}
\end{aligned}
\end{equation}
The objective for the policy is maximizing $\mathcal{J}(\xi, \omega, \psi, \chi, \phi, \eta)$ with fixed $\xi, \omega, \psi, \chi$ and $\phi$.

\begin{figure*}[t]
\begin{equation}
\begin{split}
\max_\eta \min_{\xi, \omega, \psi, \chi, \phi}\quad & \mathcal{J}(\xi, \omega, \psi, \chi, \phi, \eta)=\mathbb{E}_{\boldsymbol{\tau}^\mathrm{E}\sim\boldsymbol{\pi}^\mathrm{E}(\boldsymbol{\tau})}\Bigg\{\mathbb{E}_{\mathbf{z}\sim{q_\phi\left(\mathbf{z}\vert \boldsymbol{\tau}^\mathrm{E}\right)}}\bigg[-\sum_{t=0}^{T-1}\log \mathcal{D}_{\xi,\omega,\psi,\chi,\eta}(\mathbf{x}^{\mathrm{E}, t}, \mathbf{a}^{\mathrm{E},t}, \mathbf{x}^{\mathrm{E}, t+1},\mathbf{z}) \\
&\qquad\qquad\qquad\quad\ -\mathbb{E}_{\boldsymbol{\tau}^\mathrm{G}\sim\boldsymbol{\pi}_\eta(\boldsymbol{\tau}\vert \mathbf{z})}\sum_{t=0}^{T-1}\log \left(1-\mathcal{D}_{\xi, \omega, \psi, \chi, \eta}(\mathbf{x}^{\mathrm{G}, t}, \mathbf{a}^{\mathrm{G},t}, \mathbf{x}^{\mathrm{G}, t+1}, \mathbf{z})\right)\bigg]\Bigg\}, \\
\textrm{s.t.}\quad & \mathbb{E}_{\boldsymbol{\tau}^\mathrm{E}\sim\boldsymbol{\pi}^\mathrm{E}(\boldsymbol{\tau})}\left\{D_{KL}\left[q_\phi\left(\mathbf{z}\vert \boldsymbol{\tau}^\mathrm{E}\right))\vert\vert p(\mathbf{z})\right]\right\}\leqslant I_c, \label{eqn:opt-2}
\end{split}
\end{equation}
\hrulefill
\end{figure*}

The objective function in the problem (\ref{eqn:opt-2}) is essentially the expectation of the objective function in the problem (\ref{eqn:opt}) over the inferred posterior distribution $q_\phi\left(\boldsymbol{z}\vert \boldsymbol{\tau}^\mathrm{E}\right)$ and the demonstration distribution $\boldsymbol{\pi}^\mathrm{E}\left(\boldsymbol{\tau}\right)$. The constraint enforces an upper bound $I_c$ on the KL-divergence between $q_\phi\left(\mathbf{z}\vert\boldsymbol{\tau}^\mathrm{E}\right)$ and the prior distribution $p(\mathbf{z})$. A sparse prior is chosen to encourage sparsity in $\mathcal{G}_\mathrm{interact}$. It has the similar regularization effect as the $D_{KL}$ term in ELBO. We borrow its format from variational discriminator bottleneck (VDB) \cite{peng2018variational}. VDB improves adversarial training by constraining the information flow from the input to the discriminator. The KL-divergence constraint is derived as a variational approximation to the information bottleneck \cite{alemi2016deep}. Although having different motivation, we adopt it for two reasons. First, the proposed model is not generative because our goal is not synthesizing trajectories from the prior $p(\mathbf{z})$, but inferring the posterior $p\left(\mathbf{z}\vert\boldsymbol{\tau}^\mathrm{E}\right)$. Therefore, regularization derived from information bottleneck is more sensible compared to ELBO. Second, the constrained problem (\ref{eqn:min}) can be relaxed by introducing a Lagrange multiplier $\beta$. During training, $\beta$ is updated through dual gradient descent as follows:
\begin{equation}
\beta \leftarrow \max\left(0, \alpha_\beta\left(\mathbb{E}\left\{D_{KL}\left[q_\phi\left(\mathbf{z}\vert \boldsymbol{\tau}^\mathrm{E}\right))\vert\vert p(\mathbf{z})\right]\right\}- I_c \right) \right) \label{eqn:adapt}
\end{equation}
We find the adaptation scheme particularly advantageous. The model can focus on inferring $\mathbf{z}$ for reward learning after satisfying the sparsity constraint, because the magnitude of $\beta$ decreases towards zero once the constraint is satisfied. However, it is worth noting that our framework does not rely on the bottleneck constraint to induce a semantically meaningful latent space as in \cite{higgins2016beta}. In contrast, GRI relies on the structured reward functions to ground the latent space into semantic interactive behaviors. The bottleneck serves as a regularization to find out the minimal interaction graph to represent the interactions. In fact, we trained the baseline NRI models with the same constraints and weight update scheme. The experimental results show that the constraint itself is not sufficient to induce a sparse interaction graph.

In general, when the dynamics $\mathcal{T}$ is unknown or non-differentiable, maximum entropy RL algorithms \cite{levine2018reinforcement} are adopted to optimize the policy. In this work, we assume known and differentiable dynamics, which is a reasonable assumption for the investigated scenarios. It allows us to directly backpropagate through the trajectory for gradient estimation, which simplifies the training procedure.

\section{Experiments}
\label{sec:experiments}
We evaluate the proposed GRI model on a synthetic dataset as well as a naturalistic traffic dataset. The synthetic data are generated using policy models trained given the ground-truth reward function and interaction graph. We intend to verify if GRI can induce a semantically meaningful relational latent space and infer the underlying relations precisely. The naturalistic traffic data are extracted from the NGSIM dataset. We aim to validate if GRI can model real-world traffic scenarios effectively with the grounded latent space. Unlike synthetic agents, we do not have the privilege to access the ground-truth graphs governing human drivers' interactions. Instead, we construct hypothetical graphs after analyzing the segmented data. The hypotheses reflect humans' understanding of the traffic scenarios. \textcolor{black}{Moreover, the hypothetical graphs are built upon a set of interactive behavior whose characteristics are described by the designed reward functions. We would like to see if the reward functions can incorporate the semantic information into the latent space, and let GRI model real-world interactive systems in the same way as humans.} In each setting, we consider two traffic scenarios, car-following and lane-changing.

\subsection{Baselines} \label{sec:baseline}
The main question of interest is whether GRI can induce semantically meaningful interaction graphs. To answer the question, the most important baseline model for comparison is NRI, because GRI shares the same prior distribution of latent variables with NRI. Comparing the posterior distributions provides insights on whether the structured reward functions can ground the latent space into semantic interactive behaviors. In each experiment, the baseline NRI model has the same encoder and policy decoder as the GRI model. Besides, as stated in Sec. \ref{sec:method}, the same bottleneck constraint and the weight update scheme in Eqn. (\ref{eqn:adapt}) were applied as regularization for minimal representation.

Another model for comparison is a supervised policy decoder. We assume that the ground-truth graphs or human hypotheses are available. Therefore, we can directly train a policy decoder in a supervised way. The ground-truth graph is fed to the policy decoder as a substitute for the interaction graph sampled from the encoder output $q_\phi(\mathbf{z}\vert\boldsymbol{\tau}^\mathrm{E})$. The training of the decoder becomes a simple regression problem. We used mean square error as the loss function to train it.

As additional information is granted, it is unfair to directly compare the performance of GRI with the supervised policy model. Since the supervised model is trained with the ground-truth interaction graphs governing the systems, it is expected to achieve smaller reconstruction error. However, the supervised baseline provides some useful insights. In the naturalistic traffic scenarios, the supervised model gives us some insights into whether the human hypotheses are reasonable. If the supervised model can reconstruct the trajectories precisely, it will justify our practice to adopt graph accuracy as one of the evaluation metrics.

\textcolor{black}{More importantly, in Sec. \ref{sec:ood}, we demonstrate that GRI's latent space still maintains its semantic meaning under some perturbations to the initial states, whereas the decoders of baseline models fail to synthesize those behaviors under the same perturbations, including the supervised policy decoder which is trained with the ground-truth interaction graphs. It support our argument that direct supervision via interaction labels is not sufficient to guide the policy to synthesize behaviors with correct semantic meaning.}

There exist other alternatives for the purpose of trajectory reconstruction. However, it is not our goal in this paper to find an expressive model for accurate reconstruction. Therefore, we do not consider other baselines from this perspective. For the task of grounding the latent space into semantic interactive driving behaviors, we did not find any exact alternatives in the literature. \textcolor{black}{For the specific scenarios studied in this paper, we may design some rule-based approaches to directly infer the interaction graph. However, it is difficult to decide the parameters that best describe the interactive behaviors, because there is a spectrum in how people follow the rules \cite{8814167}. In this paper, we are interested in a data-driven module that can be incorporated into an end-to-end learning model, and has the potential to be generalized to complicated driving scenarios and systems in other domains. Apart from GRI, a potential alternative solution could be adopting a differentiable logic module. For instance, Leung et al. \cite{8814167} proposed a differentiable parametric Signal Temporal Logic formula (pSTL) which could be learnt from data. We will investigate along this direction in our future works.}

\begin{figure*}[t]
\centering
\includegraphics[width=7in]{scene.pdf}
\caption{Test scenarios with the underlying interaction graphs. In the synthetic scenarios, the graphs are the ground-truth ones governing the synthetic experts. In the naturalistic traffic scenarios, the graphs are human hypotheses reflecting humans' understanding of the traffic scenarios.} \label{fig:scene}
\end{figure*}

\subsection{Evaluation Metrics}
To evaluate a trained model, we sample a $\boldsymbol{\tau}^\mathrm{E}$ from the test dataset and extract the maximum posterior probability (MAP) estimate of edge variables, $\hat{\mathbf{z}}$, from $q_\phi(\mathbf{z}\vert\boldsymbol{\tau}^\mathrm{E})$. Afterward, we obtain a single sample of trajectories $\hat{\boldsymbol{\tau}}$ by executing the mean value of the policy output. The root mean square errors (RMSE) of states and the accuracy of $\mathcal{G}_\mathrm{interact}$ are selected as the evaluation metrics, which are computed based on $\hat{\mathbf{z}}$, $\hat{\boldsymbol{\tau}}$, $\boldsymbol{\tau}^\mathrm{E}$, and the ground truth or hypothetical latent variables denoted by $\mathbf{z}^\mathrm{E}$:
\begin{equation*}
\begin{split}
\mathrm{RMSE}_\epsilon &= \sqrt{\frac{1}{(N-1)T}\sum_{j=1}^{N}\sum_{t=0}^{T-1}(\epsilon^{\mathrm{E},t}_j-\hat{\epsilon}^t_j)^2}, \\
\mathrm{Accuracy} &= \frac{\sum_{i=1}^{N} \sum_{j=1, j\neq i}^{N}\mathbf{1}(z^{\mathrm{E}}_{i,j}=\hat{z}_{i,j})}{N(N-1)}.
\end{split}
\end{equation*}
If multiple edge types exist, we test all the possible permutations of edge types and report the one with the highest graph accuracy for NRI.

\textcolor{black}{It is worth noting that the graph accuracy on the naturalistic traffic dataset merely quantifies the divergence between the inferred graphs and the hypotheses we construct. We anticipate that GRI can attain a higher accuracy than NRI. It will imply that we can incorporate human domain knowledge into GRI and induce a semantic relational latent space consistent with the hypotheses built upon the same domain knowledge. However, a low graph accuracy does not necessarily mean that humans cannot interpret the inferred graphs well. The hypothetical graphs represent one perspective to interpret the interactive scenes. It is possible that NRI may find another sensible way to categorize and interpret the interactions, which can also be understood by humans.}

\textcolor{black}{To further study the explainability of the learned latent spaces, we want to look into the inferred graphs and have a qualitative comparison between the latent spaces learned by the two models. For each setting, we compute the distribution of estimated edge variables $\hat{\mathbf{z}}$ over the test dataset. As in \cite{kipf2018neural}, we visualize the results in multiple adjacency matrices corresponding to different edge types. In the adjacency matrix corresponding to the $k^\mathrm{th}$ type of interaction, the element $A_{i,j}$ indicates the relative frequency of $\hat{z}_{j,i}=k$, where $\hat{z}_{j,i}$ is the latent variable for the edge from node $j$ to node $i$. In other words, $A_{i,j}$ equals the ratio of test samples where the model infers $\hat{z}_{j,i}=k$. By inspecting the edge type distributions, we can get some extra insights into the explainability of the two models beyond the quantitative metrics.}

\subsection{Synthetic Scenes}\label{sec:synthetic}

\begin{table*}[t]
\centering
\caption{Performance Comparison on Synthetic Dataset}
\label{table:synthetic}
\begin{threeparttable}
\centering
\begin{tabular}{|c|c|c|c|c|c|c|c|}
\hline
\multirow{2}{*}{Model} & \multicolumn{3}{c|}{Car Following ($\Delta t=0.2s$, $T=20$)} & \multicolumn{4}{c|}{Lane Changing ($\Delta t=0.2s$, $T=30$)} \\ \cline{2-8}
& $\mathrm{RMSE}_x(\mathrm{m})$ & $\mathrm{RMSE}_v(\mathrm{m/s})$ & $\mathrm{Accuracy}(\%)$ & $\mathrm{RMSE}_x(\mathrm{m})$ & $\mathrm{RMSE}_y(\mathrm{m})$ & $\mathrm{RMSE}_v(\mathrm{m/s})$ & $\mathrm{Accuracy}(\%)$ \\ \hline GRI & $0.241\pm{0.125}$ & $0.174\pm{0.068}$ & $\mathbf{100.00\pm{0.00}}$ & \textcolor{black}{$0.529\pm{0.230}$} & \textcolor{black}{$0.207\pm{0.046}$} & \textcolor{black}{$0.303\pm{0.128}$} & \textcolor{black}{$\mathbf{99.95\pm{0.01}}$} \\ \hline NRI & ${0.047\pm{0.024}}$ & ${0.056\pm{0.015}}$ & $66.70\pm{0.00}$ & \textcolor{black}{${0.109\pm{0.045}}$} & \textcolor{black}{${0.155\pm{0.038}}$} & \textcolor{black}{${0.061\pm{0.016}}$} & \textcolor{black}{$55.9\pm{7.98}$} \\ \hline Supervised & $\mathbf{0.039\pm{0.016}}$ & $\mathbf{0.050\pm{0.009}}$ & - & \textcolor{black}{$\mathbf{0.062\pm{0.027}}$} & \textcolor{black}{$\mathbf{0.145\pm{0.035}}$} & \textcolor{black}{$\mathbf{0.048\pm{0.011}}$} & -\\ \hline
\end{tabular}
\begin{tablenotes}
\item[1] The data is presented in form of $\text{mean}\pm{\text{std}}$.
\end{tablenotes}
\end{threeparttable}
\end{table*}

As mentioned above, we designed two synthetic scenarios, car-following and lane-changing. The two scenes and their underlying interaction graphs are illustrated in Fig. \ref{fig:scene}. In both scenarios, we have a leading vehicle whose behavior does not depend on the others. Its trajectory is given without the need for reconstruction. We assume it runs at constant velocity. The other vehicles interact with each other and the leader in different ways. In the car-following scene, we model the system with two types of edges: $z_{i,j}=1$ means that Vehicle $j$ follows Vehicle $i$; $z_{i,j}=0$ means that Vehicle $j$ does not interact with Vehicle $i$. In the lane-changing scene, two additional edge types are introduced: $z_{i,j}=2$ means that Vehicle $j$ yields to Vehicle $i$; $z_{i,j}=3$ means that Vehicle $j$ cuts in front of Vehicle $i$.

The MDPs for the tested scenarios are specified as follows. In the car-following scene, since the vehicles mainly interact in longitudinal direction, we only model their longitudinal dynamics to simplify the problem. For all $j\in\{1,2,3\}$, the state vector of Vehicle $j$ consists of three states: $\mathbf{x}^t_{j}=\left[x^t_j\ v^t_j\ a^t_j\right]^\intercal$, where $x^{t}_j$ is the longitudinal coordinate, $v^{t}_j$ is the velocity, and $a^t_j$ is the acceleration. There is only one control input which is the jerk. We denote it as $\delta a^t_j$. The dynamics is governed by a 1D point-mass model:
\begin{align*}
x^{t+1}_j &= x^t_j + v^t_j\Delta t + \frac{1}{2}a^t_j{\Delta t}^2,\\
v^{t+1}_j &= v^t_j + a^t_j\Delta t, \\
a^{t+1}_j &= a^t_j + \delta a^t_j\Delta t,
\end{align*}
where $\Delta t$ is the sampling time. In the lane-changing scene, we consider both longitudinal and lateral motions. The state vector consists of six states instead: $\mathbf{x}^t_{j}=\left[x^t_j\ y^t_j\ v^t_j\ \theta^t_j\ a^t_j\ \omega^t_j\right]^\intercal$. The three additional states are the lateral coordinate $y^{t}_j$, the yaw angle $\theta^t_j$, and the yaw rate $\omega^t_j$. There is one additional action which is the yaw acceleration, denoted by $\delta \omega^t_j$. We model the vehicle as a Dubins' car:
\begin{align*}
x^{t+1}_j &= x^t_j + v^t_j\cos(\theta^t_j)\Delta t, \\
y^{t+1}_j &= y^t_j + v^t_j\sin(\theta^t_j)\Delta t, \\
v^{t+1}_j &= v^t_j + a^t_j\Delta t, \\
\theta^{t+1}_j &= \theta^{t}_j + \omega^t_j\Delta t, \\
a^{t+1}_j &= a^t_j + \delta a^t_j\Delta t, \\
\omega^{t+1}_j &= \omega^{t}_j + \delta \omega^t_j\Delta t.
\end{align*}

The structured reward functions were designed based on expert domain knowledge (e.g. transportation studies \cite{kesting2010enhanced, treiber2000congested}). We mainly referred to \cite{sun2018probabilistic, naumann2020analyzing} in this paper. For the car-following behavior, its reward function is defined as follows:
\begin{equation*}
\begin{split}
r^{e,1}_{\psi_1} \left(\mathbf{x}^t_i, \mathbf{x}^t_j\right) = & - \left(1+\exp(\psi_{1,0})\right) g_\mathrm{IDM}(\mathbf{x}^t_i, \mathbf{x}^t_j) \\
& - \left(1+\exp(\psi_{1,1})\right) g_\mathrm{dist}(\mathbf{x}^t_i, \mathbf{x}^t_j) \\
& - \left(1+\exp(\psi_{1,2})\right) g_\mathrm{lat}(\mathbf{x}^t_i, \mathbf{x}^t_j),
\end{split}
\end{equation*}
where the features are defined as:
\begin{align}
g_\mathrm{IDM}(\mathbf{x}^t_i, \mathbf{x}^t_j) & = \left(\max{\left(x^t_i-x^t_j, 0\right)}-\Delta x^{\mathrm{IDM},t}_{i,j}\right)^2, \label{eqn:fIDM}\\
g_\mathrm{dist}(\mathbf{x}^t_i, \mathbf{x}^t_j) & = \exp\left(-\frac{\left(\max{\left(x^t_i-x^t_j, 0\right)}\right)^2}{\zeta^2}\right), \label{eqn:fdist}\\
g_\mathrm{lat}(\mathbf{x}^t_i, \mathbf{x}^t_j) & =\left(y^t_j - g_\mathrm{center}(y^t_i)\right)^2.\nonumber
\end{align}
The feature $g_\mathrm{IDM}$ suggests a spatial headway $\Delta x^{\mathrm{IDM},t}_{i,j}$ derived from the intelligent driver model (IDM) \cite{kesting2010enhanced}. The feature $f_\mathrm{dist}$ ensures a minimum collision-free distance. We penalize the following vehicle for surpassing the preceding one with the help of $x^{\mathrm{IDM},t}_{i,j}$ in Eqn. (\ref{eqn:fIDM}) and Eqn. (\ref{eqn:fdist}). The last feature $g_\mathrm{lat}$ exists only in lane-changing. It regulates the following vehicle to stay in the same lane as the preceding one with the help of $g_\mathrm{center}$, which determines the lateral coordinate of the corresponding centerline based on the position of the preceding vehicle. \textcolor{black}{Altogether, the features define the following behavior as staying in the same lane as the preceding vehicle whereas keeping a safe longitudinal headway.}

The reward function for yielding is defined as:
\begin{equation*}
\begin{split}
r^{e,2}_{\psi_2} \left(\mathbf{x}^t_i, \mathbf{x}^t_j\right) = & - \left(1+\exp(\psi_{2,0})\right) g_\mathrm{yield}(\mathbf{x}^t_i, \mathbf{x}^t_j) \\
& - \left(1+\exp(\psi_{2,1})\right) g_\mathrm{dist}(\mathbf{x}^t_i, \mathbf{x}^t_j).
\end{split}
\end{equation*}
The feature $g_\mathrm{dist}$ is defined in Eqn. (\ref{eqn:fdist}). The other feature $g_\mathrm{yield}$ suggests an appropriate spatial headway for yielding:
\begin{align}
g_\mathrm{yield}(\mathbf{x}^t_i, \mathbf{x}^t_j) = & \mathbf{1}\left(g_\mathrm{center}(y^t_j)=g_\mathrm{center}(y^t_i)\right)g_\mathrm{IDM}(\mathbf{x}^t_i, \mathbf{x}^t_j)\nonumber \\
+ & \mathbf{1}\left(g_\mathrm{center}(y^t_j)\neq g_\mathrm{center}(y^t_i)\right)g_\mathrm{goal}(\mathbf{x}^t_i, \mathbf{x}^t_j), \nonumber\\
g_\mathrm{goal}(\mathbf{x}^t_i, \mathbf{x}^t_j) = & \left(\max{\left(x^t_i-x^t_j-\Delta x^{\mathrm{yield}}, 0\right)}\right)^2. \label{eqn:fgoal}
\end{align}
The suggested headway is set to be a constant value, $\Delta x^{\mathrm{yield}}$, when the other vehicle is merging, and switches to $\Delta x^{\mathrm{IDM},t}_{i,j}$ once the merging vehicle enters into the same lane, where its behavior becomes consistent with car following. \textcolor{black}{We follow \cite{sun2018probabilistic} to adopt different reward functions depending on the lanes where the vehicles are located. Merging occurs during a short period of time. Therefore, we assume the driver sets a fixed short-term goal distance as in \cite{sun2018probabilistic} and then transits to following behavior afterwards.}

The reward function for cutting-in is quite similar:
\begin{equation*}
\begin{split}
r^{e,3}_{\psi_3} \left(\mathbf{x}^t_i, \mathbf{x}^t_j\right) = & - \left(1+\exp(\psi_{3,0})\right) g_\mathrm{goal}(\mathbf{x}^t_j, \mathbf{x}^t_i) \\
& - \left(1+\exp(\psi_{3,1})\right) g_\mathrm{dist}(\mathbf{x}^t_j, \mathbf{x}^t_i),
\end{split}
\end{equation*}
where the features are defined as in Eqn. (\ref{eqn:fdist}) and Eqn. (\ref{eqn:fgoal}), but with the input arguments switched, because the merging vehicle should stay in front of the yielding one.

Apart from the edge rewards, all the agents share the same node reward function. The following one is adopted for lane-changing:
\begin{equation*}
\begin{split}
r^n_\xi (\mathbf{x}^t_j, \mathbf{a}^t_j)=&-\left(1+\exp(\xi_0)\right)f_v(\mathbf{x}_j^t)\\
&-\left(1+\exp({\xi}_{1:3})\right)^\intercal {f}_\mathrm{state}(\mathbf{x}^t_j)\\
&-\left(1+\exp({\xi}_{4:5})\right)^\intercal {f}_\mathrm{action}(\mathbf{a}^t_j)\\
&-\left(1+\exp(\xi_{6})\right)f_\mathrm{lane}(\mathbf{x}_j^t),
\end{split}
\end{equation*}
where $f_\mathrm{state}$ and $f_\mathrm{action}$ take the element-wise square of $\left[a^t_j\ \theta^t_j\ \omega^t_j \right]$ and $\left[\delta a^t_j\ \delta\omega^t_j \right]$ respectively. \textcolor{black}{It penalizes large control inputs as well as drastic longitudinal and angular motions to induce smooth and comfortable maneuver.} The feature $f_v$ is the squared error between $v^t_j$ and the speed limit $v_\mathrm{lim}$. \textcolor{black}{It regulates the vehicles to obey the speed limit.} The last term $f_\mathrm{lane}$ penalizes the vehicle for staying close to the lane boundaries. For car-following , we simply remove those terms that are irrelevant in 1D motion.
In all the reward functions, the parameters collected in $\psi$ and $\xi$ are unknown during training and inferred by GRI. We take the exponents of them and add one to the results. It enforces the model to use the features when modeling the corresponding interactions.

With the scenarios defined above, we aim to generate one dataset for each scenario. For each scenario, we randomly sampled the initial states of the vehicles and trained an expert policy given the ground-truth reward functions and the interaction graph. Afterwards, we used the trained policy to generate the dataset. The same sampling scheme was used to sample the initial states.

{\bf Results.} On each dataset, we trained a GRI model with the policy decoder (\ref{eqn:policy1})-(\ref{eqn:policy3}) introduced in Appx. \ref{app:model}. The results are summarized in Table \ref{table:synthetic}. The NRI model can reconstruct the trajectories with errors close to the supervised policy. However, it learns a relational latent space that is different from the one governing the demonstration; Therefore, the edge variables cannot be interpreted as those semantic interactive behaviors. In contrast, our GRI model interprets the interactions consistently with the domain knowledge inherited in the demonstration, and recovers the interaction graph with high accuracies. It has larger reconstruction errors compared to the baseline approaches. However, it still sufficiently recovers the interactive behaviors, and the reconstructed trajectories are sensible (see Appx. \ref{app:visual}).

We computed the empirical distribution of the estimated edge variables $\hat{z}$ over the test dataset. The results are summarized in Fig. \ref{fig:sim_graph}. The distribution concentrates into a single interaction graph for both models in both scenarios\textemdash as opposed to the case on the naturalistic traffic dataset introduced in the next section\textemdash because the synthetic agents have consistent interaction patterns over all the samples. \textcolor{black}{We observe that NRI learns symmetric relations: In both scenarios, the NRI model assigns the same edge types to the edges $e_{0,1}$ and $e_{1,0}$. It is difficult to interpret their semantic meaning because those pairwise interactions are asymmetric in our synthetic scenes. In contrast, the reward functions in our GRI model enforce an asymmetric relational latent space.}

\begin{figure}[t]
\centering
\includegraphics[width=3.2in]{synthetic_graph.pdf}
\caption{The empirical distribution of estimated edge variables $\hat{z}$ over the test dataset in the synthetic scenarios. We summarize the results in multiple adjacency matrices corresponding to different edge types. In the adjacency matrix corresponding to the $k^\mathrm{th}$ type of interaction, the element $A_{i,j}$ indicates the relative frequency of $\hat{z}_{j,i}=k$, where $\hat{z}_{j,i}$ is the latent variable for the edge from node $j$ to node $i$. } \label{fig:sim_graph}
\end{figure}

\subsection{Naturalistic Traffic Scenes}
To evaluate the proposed method in real-world traffic scenarios, we investigated the same scenarios as in the synthetic case, car-following and lane-changing. we segmented data from the Highway-101 and I-80 datasets of NGSIM. Afterwards, we further screened the data to select those interactive samples and ensure that no erratic swerving or multiple lane changes occur. Unlike the synthetic agents, human agents do not have a ground-truth interaction graph that governs their interactions. Instead, we constructed hypothetical $\mathcal{G}_\mathrm{interact}$ after analyzing the segmented data. The hypotheses for the two scenarios are depicted in Fig. \ref{fig:scene}. The one for car-following is identical to the ground-truth interaction graph we designed for the synthetic agents. However, we proposed a different hypothesis for lane-changing. We excluded the cutting-in relation to reduce the number of edge types and therefore simplify the training procedure. Moreover, we differentiated distinct interactions according to the vehicles' lateral position. We say that a vehicle yields to its preceding vehicle if they drive in neighbouring lanes, whereas it follows the preceding one if they drive in the same lane.

\textcolor{black}{As in the synthetic scenes, the trajectory of the leading vehicle is given without the need for reconstruction. We feed the ground-truth state of the leading vehicle sequentially to the policy decoder when decoding the trajectories of the other vehicles. This practice enables us to heuristically isolate a small interacting group out of the numerous number of vehicles on the highway. While the leading vehicle's behavior depends on the other vehicles, it is fairly reasonable to assume that the behavior of the modeled following vehicles is independent from other surrounding vehicles on the road after conditioning on the trajectory of the leading vehicle. Even though there may still exist other surrounding vehicles interacting with them, their influence should be subtle. The models should be able to well capture the interactions among the modeled subset while marginalizing out those subtle effects.}

The node dynamics is the same as in the synthetic scene for car-following. For lane-changing, since we did not have accurate heading information, we adopted 2D point-mass model instead. Since the behavior of human drivers is much more complicated than the synthetic agents, we designed reward functions with larger model capacity using neural networks. In car-following, the reward functions are defined as follows:
\begin{align*}
r^{e,1}_{\psi_1} \left(\mathbf{x}^t_i, \mathbf{x}^t_j\right) = & - \left(1+\exp(\psi_{1,0})\right) g_\mathrm{v}^\mathrm{NN}(\mathbf{x}^t_i, \mathbf{x}^t_j) \\
& - \left(1+\exp(\psi_{1,1})\right) g_\mathrm{s}^\mathrm{NN}(\mathbf{x}^t_i,\mathbf{x}^t_j),\\
r^n_\xi \left(\mathbf{x}^t_j, \mathbf{a}^t_j\right) = &-\left(1+\exp(\xi_0)\right)f_\mathrm{v}^\mathrm{NN}(\mathbf{x}^t_j) \\
&- \left(1+\exp(\xi_1)\right)f_\mathrm{acc}(\mathbf{x}^t_j)\\
&- \left(1+\exp(\xi_2)\right)f_\mathrm{jerk}(\mathbf{x}^t_j, \mathbf{a}^t_j),
\end{align*}
where the features are defined as:
\begin{align*}
f_\mathrm{v}^\mathrm{NN}(\mathbf{x}^t_j) & = \left(v^t_{j}-h_1(\mathbf{x}^t_j)\right)^2,\\
g_\mathrm{v}^\mathrm{NN}(\mathbf{x}^t_i, \mathbf{x}^t_j) & = \left(v^t_{j}-h_2(\mathbf{x}^t_i, \mathbf{x}^t_j)\right)^2,\\
g_\mathrm{s}^\mathrm{NN}(\mathbf{x}^t_i,\mathbf{x}^t_j) & = \mathrm{ReLU} {\left(h_3\left(\mathbf{x}^t_i, \mathbf{x}^t_j\right)-x^t_i+x^t_j\right)}^2.
\end{align*}
The features $f_\mathrm{acc}$ and $f_\mathrm{jerk}$ penalize the squared magnitude of acceleration and jerk \textcolor{black}{to induce smooth and comfortable maneuver}. The functions $h_1$, $h_2$ and $h_3$ are neural networks with ReLU output activation. The feature $g^\mathrm{NN}_\mathrm{s}$ is the critical component which shapes the car-following behavior. It learns a non-negative reference headway and penalizes the following vehicle for violating it. The feature $g_\mathrm{v}^\mathrm{NN}$ and $f_\mathrm{v}^\mathrm{NN}$ suggest reference velocities considering interaction and merely itself respectively. \textcolor{black}{The edge reward function has large modeling capacity because we let it learn adaptive reference headway and velocity from data. Nevertheless, it still defines the fundamental characteristic of the following behavior, which is always staying behind the preceding vehicle.}

\textcolor{black}{In lane-changing, the node reward function and the edge reward function for the following behavior are similar to those in the car-following scenario. The node reward function has additional term for lateral position, which encourages the vehicles to drive on the target lane, i.e., the lane where the leading vehicle is driving. It also has additional terms to penalize the magnitude of lateral velocity and acceleration to induce comfortable maneuver.} To design the yielding reward, we define a collision point of two vehicles based on their states. We approximate the vehicles' trajectories as piecewise-linear between sequential timesteps, and compute the collision point as the intersection between their trajectories (Fig. \ref{fig:poc_viz}). We threshold the point if it exceeds a hard-coded range of interest (e.g. if it is behind the vehicles or greater than certain distance). Afterwards, we define the distance-to-collision ($d_{poc}$) as the longitudinal distance from the vehicle to the collision point, and the time-to-collision ($T_{col}$) as the time to reach the collision point calculated by dividing $d_{poc}$ with the velocity of the vehicle. Then the yielding reward function is defined as follows:
\begin{equation*}
\begin{split}
r^{e,2}_{\psi_2} \left(\mathbf{x}^t_i, \mathbf{x}^t_j\right) = & - \left(1+\exp(\psi_{2,0})\right)g_\mathrm{spatial}^\mathrm{NN}(\mathbf{x}^t_i, \mathbf{x}^t_j) \\
& - \left(1+\exp(\psi_{2,1})\right)g_\mathrm{time}^\mathrm{NN}(\mathbf{x}^t_i, \mathbf{x}^t_j),
\end{split}
\end{equation*}
where
\begin{equation*}
\begin{split}
g_\mathrm{spatial}^\mathrm{NN}(\mathbf{x}^t_i, \mathbf{x}^t_j) &= \mathrm{ReLU}{\left((x_{j}-x_{poc})-h_\mathrm{d_{poc}}(\mathbf{x}^t_i, \mathbf{x}^t_j)\right)}^2,\\
g_\mathrm{time}^\mathrm{NN}(\mathbf{x}^t_i, \mathbf{x}^t_j) &=
\mathrm{ReLU}{\left(h_\mathrm{T_{col}}(\mathbf{x}^t_i, \mathbf{x}^t_j)-(T_{col_{i}}-T_{col_{j}})\right)}^2.
\end{split}
\end{equation*}
The functions $h_\mathrm{d_{poc}}$ and $h_\mathrm{T_{col}}$ are neural networks with ReLU output activation. The $g_\mathrm{spatial}$ term learns a spatial aspect of the yield behavior and compares the agent's distance from the estimated collision-point with the NN-learned \textit{safe} reference within which the lane-changing maneuver can be done. The second term $g_\mathrm{time}$ adds a temporal aspect, by enforcing the vehicle to ensure a minimum \emph{safe} time headway. \textcolor{black}{We adopt $g_\mathrm{time}$ because time-to-collision is an important measure in traffic safety assessment \cite{minderhoud2001extended}}. The intuition behind is to ensure that the vehicles do not occupy the same position at the same time.

\begin{figure}[t]
\centering
\includegraphics[height=1.1in]{poc_cropped.pdf}
\caption{Collision point diagram. At every timestep, the heading vector of the agents' can be calculated approximating the motion as linear. The intersection between these vectors is taken to be the collision point where the agents would collide if a yield action is not taken. } \label{fig:poc_viz}
\end{figure}

{\bf Results.} For each scenario, we trained a GRI model with the recurrent policy decoder (\ref{eqn:rnn1})-(\ref{eqn:rnn4}) in Appx. \ref{app:model}. The results are summarized in Table \ref{table:ngsim}. In car-following, the NRI model still performs better on trajectory reconstruction, but the GRI model achieves comparable RMSE on NGSIM dataset. In lane-changing, their comparison is consistent: The NRI model slightly outperforms our model in trajectory reconstruction; Our model dominates the NRI model in graph accuracy.

We visualize the interaction graphs in Fig. \ref{fig:ngsim_graph}. One interesting observation is that the graphs inferred by NRI have more edges in general. We want to emphasize that both models are trained under the same sparsity constraint. The results imply that we could guide the model to explore a clean and sparse representation of interactions by incorporating relevant domain knowledge, whereas the sparsity regularization itself is not sufficient to serve the purpose. Moreover, the NRI model assigns the same edge type to both edges between a pair of agents. It makes the graphs less interpretable because the vehicles ought to affect each other in different ways. On the other hand, even if different from the hypotheses, our GRI model tends to infer sparse graphs with directional edges.

\textcolor{black}{For the supervised policy, it has the lowest reconstruction error in lane-changing. It implies that the human hypothesis is reasonable because it is capable to model the interactions among human drivers. For the car-following case, its reconstruction error is slightly higher than NRI. Since we cannot assure that our hypothesis is the ground-truth interaction graph underlying the interacting system\textemdash In fact, as we mentioned before, we never meant to treat it as the ground-truth\textemdash it is possible that the NRI model can find a latent space that can effectively model the interactions in the unsupervised manner. However, as shown in Fig. \ref{fig:ngsim_graph}, it is difficult to interpret the graphs inferred by NRI. Considering the sparse and semantic nature of the hypothesis as well as the fact that the supervised policy's reconstruction error is on par with the NRI model, we think the chosen hypothesis is a valid one.}

\begin{table*}[t]
\centering
\caption{Performance Comparison on Naturalistic Traffic Dataset}
\label{table:ngsim}
\begin{threeparttable}
\centering
\begin{tabular}{|c|c|c|c|c|c|c|c|c|}
\hline
\multirow{2}{*}{Model} & \multicolumn{3}{c|}{Car Following ($\Delta t=0.2s, T=30$)} & \multicolumn{4}{c|}{Lane Changing ($\Delta t=0.2s, T=40$)} \\ \cline{2-8}
& $\mathrm{RMSE}_x(\mathrm{m})$ & $\mathrm{RMSE}_v(\mathrm{m/s})$ & $\mathrm{Accuracy}(\%)$ & $\mathrm{RMSE}_x(\mathrm{m})$ & $\mathrm{RMSE}_y(\mathrm{m})$ & $\mathrm{RMSE}_v(\mathrm{m/s})$ & $\mathrm{Accuracy}(\%)$ \\ \hline GRI & $1.700\pm{1.005}$ & $0.721\pm{0.363}$ & $\mathbf{100.00\pm{0.00}}$ & $7.118\pm{3.647}$ & $0.764\pm{0.336}$ & ${4.320\pm2.392}$ & $\mathbf{98.55\pm{0.06}}$ \\ \hline NRI & $\mathbf{1.436\pm{0.880}}$ & $\mathbf{0.650\pm{0.328}}$ & $64.09\pm{0.08}$ & $6.532\pm{3.822}$ & $0.330\pm{0.181}$ & $\mathbf{4.291\pm2.544}$ & $28.98\pm{0.08}$ \\ \hline Supervised & ${1.482\pm{0.938}}$ & ${0.665\pm{0.344}}$ & - & $\mathbf{5.897\pm{3.651}}$ & $\mathbf{0.323\pm{0.223}}$ & $4.307\pm{2.435}$ & - \\ \hline
\end{tabular}
\begin{tablenotes}
\item[1] The data is presented in form of $\text{mean}\pm{\text{std}}$.
\end{tablenotes}
\end{threeparttable}
\end{table*}

\begin{figure}[t]
\centering
\includegraphics[width=3.1in]{ngsim_graph.pdf}
\caption{The empirical distribution of estimated edge variables $\hat{z}$ over the test dataset in the naturalistic traffic scenarios. We summarize the results in multiple adjacency matrices corresponding to different edge types. In the adjacency matrix corresponding to the $k^\mathrm{th}$ type of interaction, the element $A_{i,j}$ indicates the relative frequency of $z_{j,i}=k$, where $z_{j,i}$ is the latent variable for the edge from node $j$ to node $i$. } \label{fig:ngsim_graph}
\end{figure}

\subsection{\textcolor{black}{Semantic Meaning of Latent Space}} \label{sec:ood}
\textcolor{black}{The above experimental results show that our GRI model can recover the ground-truth interaction graphs in the synthetic scenarios with high accuracy, and infer interaction graphs that are consistent with human hypothesis on the NGSIM dataset. However, as we argue in Sec. \ref{sec:introduction}, accurate interaction inference alone is not sufficient to show that the model learns a semantically meaningful latent space that is consistent with human domain knowledge. Given an edge, the policy decoder should also synthesize the corresponding semantic interactive behavior indicated by its edge type. It is difficult to verify whether the policy decoder is able to synthesize semantically meaningful interaction simply by monitoring the reconstruction error. Small reconstruction error on in-distribution data could be achieved by imitating demonstration without modeling the correct interaction \cite{de2019causal, tang2021exploring}. To study the semantic meaning of latent space, we design a set of out-of-distribution tests \footnote{For clarification, the models used in this section are the same as those introduced in Sec. \ref{sec:synthetic}. We merely designed additional out-of-distribution cases for testing.} by adding increasing perturbation to the initial states. We then enforce the same edge types as in the in-distribution case, and run those different policy decoders to generate the trajectories. We are curious about whether the policy decoders can consistently synthesize the correct semantic interactive behavior under distribution shift. If so, we claim the latent space indeed possesses the semantic meaning that is consistent with human domain knowledge.}

\textcolor{black}{In the synthetic scenarios, we focus on the following relation. For both car-following and lane-changing scenes, we keep the two vehicles with the following relation, resulting in interaction graphs merely consisting of the following edges (Fig. \ref{fig:ood_scene}). We introduce perturbation by decreasing the initial longitudinal headway to values unseen during the training stage.} The initial longitudinal headway is defined as $\Delta x=x^0_1 - x^0_0$, namely the longitudinal distance from Vehicle 1 to Vehicle 0 at the first time step. During the training stage, we sampled $\Delta x$ from uniform distributions: In car-following, $\Delta x\sim \mathrm{unif}(4, 8)$; In lane-changing, $\Delta x\sim \mathrm{unif}(8, 12)$. In the out-of-distribution experiments, we gradually decreased $\Delta x$ from the lower bound to some negative value, which means Vehicle 0 is placed in front of Vehicle 1. We are curious about if the models can generate trajectories meeting the characteristics of the car-following behavior in these unseen scenarios\textemdash scenarios with a different number of vehicles and distorted state distribution. To quantitatively evaluate if the synthesized behavior satisfying the requirement of car-following, we consider three metrics for evaluation:

\begin{itemize}
{\color{black}
\item Success Rate:
\begin{align}
\mathrm{Success Rate} &= \frac{1}{N}\sum_{i=1}^N\mathbf{1}(\Delta x^f_{i} \geqslant \delta_f), \label{eqn:success_rate} \\
\textrm{where } \Delta x^f_{i} &= x^T_{1,i} - x^T_{0,i}, \nonumber
\end{align}

\item Collision Rate:
\begin{align}
\mathrm{Collision Rate} &= \frac{1}{N}\sum_{i=1}^N \mathbf{1}(d_{\min,i} \leqslant \delta_c), \label{eqn:minimum_distance} \\
\textrm{where } d_{\min,i} & = \min_t \sqrt{\left|x^t_{1,i} - x^t_{0,i}\right|^2 + \left|y^t_{1,i} - y^t_{0,i}\right|^2}, \nonumber
\end{align}
}

\item Lateral distance:
\begin{equation}
\Delta y = \left|y^T_1 - y^T_0\right| - \left|y^0_1 - y^0_0\right|. \label{eqn:lateral_distance}
\end{equation}

\end{itemize}

\begin{figure}[t]
\centering
\includegraphics[width=2.7in]{ood_scene.pdf}
\caption{Out-of-distribution scenarios. We removed one vehicle from the nominal scenes and shifted the initial longitudinal headway $\Delta x$ to unseen values.} \label{fig:ood_scene}
\end{figure}

We intend to quantify three typical characteristics of the following behavior with the metrics defined above: 1) staying behind the leading vehicle; 2) maintaining a substantial safe distance from the leading vehicle; 3) keeping in the same lane as the leading vehicle. \textcolor{black}{We consider the following vehicle's maneuver successful if the vehicle manages to keep a substantial positive final headway. And we consider two vehicles collide if the minimum distance between them is smaller than a safety threshold.} Lastly, we expect the following behavior to attain a negative $\Delta y$, which means the following vehicle attempts to approach the leading vehicle's lane.

All metrics were applied in the lane-changing scenario, but we only adopted $\mathrm{Success Rate}$ in the car-following scenario. Since we only model the longitudinal dynamics, $\Delta y$ is not applicable. For the same reason, if their initial positions are too close or the following vehicle located ahead of the leading one initially, the following vehicle will inevitably crush into the leading vehicle, which results in $d_{\min}=0$. Therefore, we only care about the first characteristic.

The results are summarized in Fig. \ref{fig:ood_stats_cf} and Fig. \ref{fig:ood_stats_lc}, where we plot the mean values of the evaluated metrics versus $\Delta x$. \textcolor{black}{In the car-following scenario, the NRI policy fails to slow down Vehicle 0 to follow Vehicle 1 when $\Delta x$ becomes negative. In contrast, the supervised policy and GRI policy maintain high success rates with negative $\Delta x$. However, the number of failure cases starts to increase for the supervised policy when $\Delta x$ becomes substantially negative, whereas the GRI policy maintains a perfect success rate over the tested range of perturbation.} We visualize a marginal example in Fig.\ref{fig:ood}, where both the NRI policy and the supervised one fail to maintain a positive final headway.

\begin{figure}[t]
\centering
\includegraphics[width=2.8in]{ood_stats_cf.pdf}
\caption{\textcolor{black}{Results in out-of-distribution synthetic car-following scenario. We plot $\mathrm{Success Rate}$ versus $\Delta x$ with the error band denoting $95\%$ confidence interval of the indicator, $\mathbf{1}(\Delta x^f_i\geqslant \delta_f)$. We set $\delta_f=2\mathrm{m}$.}} \label{fig:ood_stats_cf}
\end{figure}

\textcolor{black}{In the lane-changing scenario, the GRI policy maintains a consistent perfect success rate over all tested values of $\Delta x$. For the other two models, the success rates drastically decrease with decreasing $\Delta x$.} In terms of $\Delta y$, all models tend to reduce the lateral distance between the vehicles which is consistent with the second characteristic of the following behavior. However, we found that the GRI policy attains an average $\Delta y$ with smaller magnitude and the magnitude decreases with decreasing $\Delta x$. It implies that the GRI policy changes its strategy when the initial position of Vehicle 0 is ahead of Vehicle 1. In order to keep a proper safe distance, Vehicle 0 does not change its lane until Vehicle 1 surpasses itself. On the other hand, the lateral behavior is unchanged for the other two models. However, the vehicle cannot maintain a substantial safe distance if it changes its lane too early, \textcolor{black}{which is verified by the plot of collision rate versus $\Delta x$}. The difference in their strategies is further illustrated by the example visualized in Fig. \ref{fig:ood}.

\begin{figure}[t]
\centering
\includegraphics[width=3.0in]{ood_stats_lc.pdf}
\caption{\textcolor{black}{Results in out-of-distribution synthetic lane-changing scenario. We plot $\mathrm{Success Rate}$, $\mathrm{Collision Rate}$, and the mean value of $\Delta y$ versus $\Delta x$. The error bands denote $95\%$ confidence interval. For $\mathrm{Success Rate}$ and $\mathrm{Collision Rate}$, the error bands are of the indicator functions. We set $\delta_f=\delta_c=2\mathrm{m}$.}} \label{fig:ood_stats_lc}
\end{figure}

\begin{figure}[t]
\centering
\includegraphics[width=2.9in]{ood_stats_cf_ngsim.pdf}
\caption{\textcolor{black}{Results in out-of-distribution naturalistic traffic car-following scenario. We plot $\mathrm{Success Rate}$ versus $\Delta x$ with the error bands denoting $95\%$ confidence interval of the indicator, $\mathbf{1}(\Delta x^f_i\geqslant \delta_f)$. We set $\delta_f=2\mathrm{m}$.}} \label{fig:ood_stats_cf_ngsim}
\end{figure}

\begin{figure}[t]
\centering
\includegraphics[width=3.2in]{ood_stats_ngsim_lc.pdf}
\caption{\textcolor{black}{Results in out-of-distribution naturalistic traffic lane-changing scenario. We plot $\mathrm{Success Rate}$, $\mathrm{Collision Rate}$, and the mean value of $\Delta y$ versus $\Delta x$. The error bands denote $95\%$ confidence interval. For $\mathrm{Success Rate}$ and $\mathrm{Collision Rate}$, the error bands are of the indicator functions. We set $\delta_f=\delta_c=2\mathrm{m}$.}} \label{fig:ood_stats_ngsim_lc}
\end{figure}

\begin{figure*}[t]
\centering
\includegraphics[width=6.9in]{ood.pdf}
\caption{\textcolor{black}{Examples where the leading car is placed behind the following one at the initial timestep. The trajectories are visualized as a sequences of rectangles. Each rectangle represents a vehicle at a specific time step. The vehicles are driving along the positive direction of the x-axis. The GRI policy still prompts the car-following behavior: It slows down the vehicle until the leading one surpasses it. Meanwhile, the NRI policy and the supervised one do not behave as $\mathcal{G}_\mathrm{interact}$ suggests.}} \label{fig:ood}
\end{figure*}

We repeat the experiment on the NGSIM datasets. Similar to the case of synthetic dataset, we remove one vehicle from each scene, resulting in an interaction graph consisting of a single edge (Fig. \ref{fig:ood_scene}). \textcolor{black}{It is worth noting that removing a vehicle from a scene alters the dynamic of the interacting system. It is not fair to expect the models to synthesize the same trajectories in the dataset. Therefore, we do not aim to compare the generated trajectories with the ones in the dataset in this out-of-distribution test. We just check whether the generated trajectories satisfy the desired characteristics of the corresponding interactive behaviors.}

In the lane-changing case, the remaining edge has the type of yielding. According to our definition of the yielding relation, we consider the same characteristics and adopt the same metrics defined in Eqn. (\ref{eqn:success_rate})-(\ref{eqn:lateral_distance}) for evaluation. Since we do not have control over the data generation procedure, we generate out-of-distribution test samples with different levels of discrepancy by controlling the ratio of longitudinal headway change. Given a sample from the original test dataset, we generate its corresponding out-of-distribution sample by shifting its initial longitudinal headway $\Delta x$ by a certain ratio, denoted by $\delta$, resulting in a new longitudinal headway $\Delta x'$:
\begin{equation*}
\Delta x' = (1 - \delta)\Delta x.
\end{equation*}
We evaluate the models on datasets generated with different values of $\delta$. We are particularly interested in the cases when $\delta\geqslant1$, which leads to a negative initial headway. We present the results in Fig. \ref{fig:ood_stats_cf_ngsim} and $\ref{fig:ood_stats_ngsim_lc}$. The comparison is quite consistent with the synthetic scenarios. Compared to the other baselines, our GRI policy can synthesize trajectories that satisfy the desired semantic properties in a larger range of distribution shift.

The results suggest that even though the NRI model can accurately reconstruct the trajectories, the unsupervised latent space and the corresponding policies do not capture the semantic meanings behind the interactions. In contrast, the GRI model learns a semantically meaningful latent space which is consistent with human domain knowledge. Another useful insight we draw from the experiment is that interaction labels are not sufficient to induce an explainable model with semantic latent space. Even though the supervised policy utilizes additional information on the ground-truth interaction graph, it fails to synthesize the following behavior in novel scenarios. Although the GRI model still has a considerable gap in reconstruction performance compared to the supervised baseline, it provides a promising and principled manner to incorporate domain knowledge into a learning-based autonomous driving system and induce an explainable model.

{\color{black}
\section{Discussion and Limitation}
\label{sec:discussion}
\subsection{Application of the Semantic Latent Space}
Enabling an explainable model is a crucial step towards trustworthy human interaction. However, it is still unclear how humans may benefit from the improved explainability. We would like to have a brief discussion on the potential application of the semantic latent space introduced in GRI. When the autonomous vehicle encounters an unfamiliar situation (e.g., the out-of-distribution scenarios studied in Sec. \ref{sec:ood}), a semantic latent space gives the safety drivers or passengers the privilege to review and override the inferred interaction graph if the model misunderstands the scenario. In constrast, humans can neither understand an interaction graph nor identify the correct edge types, if the learned interactive behaviors do not have explicit semantic meaning. Such kind of safety assurance could help building up a safe and trustworthy cooperation between humans and the autonomous vehicles.

However, it is impractical to keep the users monitoring the model output in real-time. Instead, we can introduce an additional module to detect out-of-distribution scenes \cite{filos2020can, SunL-RSS-21} and use the estimated epistemic uncertainty to decide when to query the end users. In \cite{filos2020can}, the authors proposed an adaptive variant of their robust imitative planning algorithm, which incorporates such a unit. It is also a common practice for current autonomous driving companies to have human assistants for vehicles to query when encountering abnormal situation.

\subsection{Limitation of the Learning Algorithm}
In our experiments, GRI always has higher reconstruction error than NRI, especially on the synthetic dataset. One of the reasons is that reconstruction error is not directly optimized under the AIRL formulation. The objective function of NRI consists of a reconstruction loss, which essentially minimizes the Euclidean distance between the reconstructed trajectory and the ground-truth one. In other words, it directly minimizes the RMSE metrics used in our evaluation. In contrast, GRI adopts the objective function of AIRL, which also minimizes a distance between the trajectory pair. However, the distance is defined by the learned discriminator and is not necessarily equivalent to the Euclidean distance. In Appx. \ref{app:airl_ablation}, we study two AIRL baseline models on the synthetic dataset. The results suggest that none of these AIRL-based approaches achieve the same reconstruction performance as NRI.

Another reason is that the current learning algorithm is not quite stable, because of the adversarial training scheme we introduce when incorporating AIRL into the original NRI model. In typical AIRL settings, we may mitigate this problem by warmstarting the training with a policy network pretrained through imitation learning or behavior cloning \cite{finn2016guided, yu2019meta}. However, since we aim to learn a semantic latent space, warmstarting the training with a model with unsupervised latent space is not helpful. Alternatively, we may initialize the policy decoder with the supervised one. One issue is that it will change our current setting where human labels are not required. We will investigate this new setting in our future work, and develop a more stable training scheme to further optimize the performance of GRI. A stable training scheme is also a prerequisite before applying GRI to more sophisticated real-world scenarios.

The structured reward functions also interfere the stability of the learning procedure. Compared to the variant of GRI studied in Appx. \ref{app:airl_ablation} with semantic reward functions removed, we found GRI is more sensitive to hyperparameters and prone to diverging if not carefully tuned. It is because that although the structured reward functions are differentiable, it is not guaranteed that the reward functions can be stably optimized through gradient descent. In our future work, we will explore a more stable and robust learning scheme with those structured reward functions.
}

\section{Conclusion and Future Work}
\label{sec:conclusion}
In this work, we propose Grounded Relational Inference (GRI), which models an interactive system's underlying dynamics by inferring the agents' semantic relations. By incorporating structured reward functions, we ground the relational latent space into semantically meaningful behaviors defined with expert domain knowledge. We demonstrate that GRI can model interactive traffic scenarios under both simulation and real-world settings, and generate semantic interaction graphs explaining the vehicle's behavior by their interactions.

Although we limit our experiments to the autonomous driving domain, the model itself is formulated without specifying the context. As long as proper domain knowledge is available, the proposed method can be extended naturally to other fields (e.g., human-robot interaction). However, there are several technical gaps we need to bridge before extending the current framework to more complicated traffic scenarios and interactive systems in other fields. One gap between the current model and these practical modules is graph dynamics. Throughout the paper, we assume a static interaction graph over the time horizon. We will investigate how to incorporate dynamic graph modeling into the current framework. Another gap is the cooperative assumption, which we would like to remove in the future so that the framework can be generalized to non-cooperative scenarios. Besides, as we have mentioned before, the GRI model still has a considerable gap in reconstruction performance compared to the other baselines. In future work, we will improve the model architecture and training algorithm to fill the performance gap while maintaining the advantages of GRI as an explainable model.

\section{Appendix}
\subsection{Graph Neural Network Model Details} \label{app:model}
In terms of model structure, both the encoder and the policy decoder are built based on node-to-node message-passing \cite{gilmer2017neural}, consisting of a node-to-edge message-passing and an edge-to-node message-passing:
\begin{align}
v\rightarrow e:\ \ \mathbf{h}^l_{i,j} & = f^l_e(\mathbf{h}^l_i, \mathbf{h}^l_j, \mathbf{x}_{i,j}), \label{eqn:gnn-1}\\
e\rightarrow v:\ \mathbf{h}^{l+1}_{j} & = f^l_v(\sum\nolimits_{i\in\mathcal{N}_j}\mathbf{h}^l_{i,j}, \mathbf{x}_j), \label{eqn:gnn-2}
\end{align}
where $\mathbf{h}^l_i$ is the embedded hidden state of node $v_i$ in the $l^{\rm th}$ layer and $\mathbf{h}^l_{i,j}$ is the embedded hidden state of the edge $e_{i,j}$. The features $\mathbf{x}_i$ and $\mathbf{x}_{i,j}$ are assigned to the node $v_i$ and the edge $e_{i,j}$ respectively as inputs. $\mathcal{N}_j$ denotes the set of the indices of $v_i$'s neighbouring nodes connected by an incoming edge. The functions $f^l_e$ and $f^l_v$ are neural networks for edges and nodes respectively, shared across the graph within the $l^\mathrm{th}$ layer of node-to-node massage-passing.

{\bf GNN Encoder.} The GNN encoder is essentially the same as in NRI. It models the posterior distribution as $q_\phi(\mathbf{z}\vert\boldsymbol{\tau})$ with the following operations:
\begin{align*}
\mathbf{h}^1_j & = f_{\mathrm{emb}}(\mathbf{x}_j), \\
v\rightarrow e:\ \: \mathbf{h}^1_{i,j} & = f^1_e(\mathbf{h}^1_i, \mathbf{h}^1_j), \\
e\rightarrow v:\ \ \ \mathbf{h}^{2}_{j} & = f^1_v\left(\sum\nolimits_{i\neq j}\mathbf{h}^1_{i,j}\right), \\
v\rightarrow e:\ \: \mathbf{h}^2_{i,j} & = f^2_e(\mathbf{h}^2_i, \mathbf{h}^2_j), \\
q_\phi(\mathbf{z}_{i,j}\vert{\boldsymbol{\tau}}) & = \mathrm{softmax}\left(\mathbf{h}^2_{i,j}\right),
\end{align*}
where $f_e^1, f_v^1$ and $f_e^2$ are fully-connected networks (MLP) and $f_{\mathrm{emb}}$ is a 1D convolutional networks (CNN) with attentive pooling.

{\bf GNN Policy Decoder.} The policy operates over $\mathcal{G}_\mathrm{interact}$ and models the distribution $\boldsymbol{\pi}_\eta \left(\mathbf{a}^t\vert{\mathbf{x}^t, \mathbf{z}}\right)$, which can be factorized with $\pi_\eta\left(\mathbf{a}^t_j\vert{\mathbf{x}^t, \mathbf{z}}\right)$ as in Eqn. (\ref{eqn:facto}). We model $\pi_\eta$ as a Gaussian distribution with the mean value parameterized by the following GNN:
\begin{align}
v\rightarrow e:\ \ \ \ \Tilde{\mathbf{h}}^t_{i,j} & = \sum_{k=0}^{K}\mathbf{1}(z_{i,j}=k) \Tilde{f}^{k}_{e}(\mathbf{x}^t_i, \mathbf{x}^t_j), \label{eqn:policy1}\\
e\rightarrow v:\ \ \ \ \ \ \mathbf{\mu}_j^t & = \Tilde{f}_v\left(\sum\nolimits_{i\neq j}{\Tilde{\mathbf{h}}^t_{i,j}}\right), \label{eqn:policy2} \\
\pi_\eta\left(\mathbf{a}^t_j\vert{\mathbf{x}^t, \mathbf{z}}\right) & = \mathcal{N}(\boldsymbol{\mu}^t_j, \sigma^2\mathbf{I}) \label{eqn:policy3}.
\end{align}

Alternatively, the model capacity is improved by using a recurrent policy $\pi_\eta\left(\mathbf{a}^t_j\vert \mathbf{x}^t, \dots, \mathbf{x}^1, \mathbf{z}\right)$; Namely, the agents take actions according to the historical trajectories of the system. We follow the practice in \cite{kipf2018neural} and add a GRU unit to obtain the following recurrent model:
\begin{align}
v\rightarrow e:\ \ \ \ \Tilde{\mathbf{h}}^t_{i,j} & = \sum_{k=0}^{K}\mathbf{1}(z_{i,j}=k) \Tilde{f}^{k}_{e}\left(\Tilde{\mathbf{h}}^{t}_i,\Tilde{\mathbf{h}}^{t}_j\right), \label{eqn:rnn1}\\
e\rightarrow v:\ \ \ \Tilde{\mathbf{h}}^{t+1}_j & = \mathrm{GRU}\left(\sum\nolimits_{i\neq j} \Tilde{\mathbf{h}}^t_{i,j}, \mathbf{x}^t_j, \Tilde{\mathbf{h}}^t_{j} \right), \\
\mathbf{\mu}_j^t & = f_\mathrm{out}\left(\Tilde{\mathbf{h}}^{t+1}_j\right), \\
\pi_\eta\left(\mathbf{a}^t_j\vert \mathbf{x}^t, \dots, \mathbf{x}^1, \mathbf{z}\right) & = \mathcal{N}(\boldsymbol{\mu}^t_j, \sigma^2\mathbf{I}), \label{eqn:rnn4}
\end{align}
where $\Tilde{\mathbf{h}}^t_i$ is the recurrent hidden state encoding the historical information up to the time step $t-1$.

{\color{black}
\subsection{Reconstruction Visualization on Synthetic Dataset}\label{app:visual}

\begin{figure*}[t]
\centering
\includegraphics[width=7.1in]{std_synthetic.png}
\caption{\textcolor{black}{Average standard deviation of states along the time horizon. (a) and (b) show the standard deviation of $x$ and $v$ in the synthetic car-following scenario. (c)-(e) show the standard deviation of $x$, $y$, and $v$ in the synthetic lane-changing scenario.}}
\label{fig:std_synthetic}
\end{figure*}

\begin{figure}[t]
\centering
\includegraphics[width=3in]{visual_lc.png}
\caption{\textcolor{black}{Visualization of the reconstructed trajectories in a lane-changing scene. (a) and (b) correspond to the trajectories of Car 1 and Car 0 respectively. We visualize the distributions of the reconstructed trajectories estimated using kernel density estimate. The ground-truth trajectories are denoted by the blue curves.}}
\label{fig:visual_lc}
\end{figure}

In our experiments, we found that GRI has significantly larger reconstruction error on the synthetic dataset than the NRI baseline. To better understand this performance gap on reconstruction, we looked into the reconstructed trajectories of both models. Instead of executing the mean value of the policy output as we did in our main experiments, we sampled the actions from the policy distribution to estimate the variance of reconstructed trajectories. In Fig. \ref{fig:std_synthetic}, we plot the average standard deviation of reconstructed states along the time horizon. We observed that the policy decoder of GRI tends to have larger variance. It partially explains the large RMSE values reported in Table \ref{table:synthetic}: the metrics were computed with a single reconstructed trajectory. The policy distribution of GRI still has larger bias than the one of NRI. We visualize the reconstructed trajectories of a lane-changing case in Fig. \ref{fig:visual_lc}. While the GRI policy induces larger variance, the distribution of the reconstructed trajectories is sensible.

\subsection{AIRL Ablation Study} \label{app:airl_ablation}
With the motivation of incorporating semantic meaning into the relational latent space, we developed GRI by introducing AIRL into relational inference and studied how the semantic reward functions may guide relational latent space learning. Meanwhile, it would be interesting to take a different perspective and study the effects of introducing relational inference and semantic reward functions into AIRL. In this section, we take the synthetic scenarios as examples and conduct an ablation study, where we compare GRI against two variants.

The first one is a AIRL variant, denoted by GRI-AIRL, which is obtained by removing relational inference and semantic reward functions from GRI. Concretely, both the policy and reward decoders operate on a fully-connected interaction graph with homogeneous edge type. And we simply use MLPs to model the reward functions in Eqn. (\ref{eqn:node_reward}) and (\ref{eqn:edge_reward}), instead of those semantic reward functions. The objective function then becomes Eqn. (\ref{eqn:opt-2}), but without neither the expectation over $\mathbf{z}$ nor the information bottleneck constraint. The second one is a variational AIRL variant, denoted by GRI-VAIRL, in which we introduce relational inference but do not use the semantic reward functions. In this case, the objective function is identical to the one in GRI, i.e., Eqn. (\ref{eqn:opt-2}).

\begin{figure}[t]
\centering
\includegraphics[width=2.5in]{graph_vairl.pdf}
\caption{\textcolor{black}{The interaction graph inferred by the GRI-VAIRL model in the synthetic lane-changing scenario.}}
\label{fig:graph_vairl}
\end{figure}

The results are summarized in Table \ref{table:ablation}. For the car-following scenario, the reconstruction performance is improved after introducing relational inference into AIRL. It is interesting that the GRI-VAIRL variant is able to recover the ground-truth interaction graph, even without the semantic reward functions. It makes sense because the car-following scenario only consists of a single non-trivial edge type. It is plausible for the model to distinguish non-interaction edges from the others, because null reward is enforced for non-interaction edges. In some senses, we may still consider the reward function semantic\textemdash it incorporates the semantic meaning of non-interaction into the latent space. However, we cannot guarantee that GRI-VAIRL can distinguish between different non-trivial interactive behaviors, which is verified by the lane-changing case. Fig. \ref{fig:graph_vairl} shows the inferred interaction graph. The model only adopts a single non-trivial edge type to describe all the interactive behaviors. Compared to the ground-truth graph, the inferred graph has an additional edge $z_{2,1}$ but ignores the edge $z_{1,0}$. Ignoring the edge $z_{1,0}$ limits the modeling capacity of the policy decoder, which could possibly explain why GRI-VAIRL has larger $\mathrm{RMSE}_x$ and $\mathrm{RMSE}_v$ than GRI-AIRL in the lane-changing case.

In summary, we could improve reconstruction performance by introducing relational inference into AIRL. Even if GRI-VAIRL has larger reconstruction error in the lane-changing case due to the biased inferred graph, we still observe that GRI-VAIRL converges faster. The learning process becomes more stable and less sensitive to different hyperparameters. We think it is because the model may identify those agents that are not interacting with each other, preventing the reward decoder from fitting a reward function unifying both interactive and non-interactive behaviors. Meanwhile, it is still necessary to incorporate semantic reward functions to differentiate different interactive behaviors and induce a semantically meaningful interaction graph. However, semantic latent space comes at a cost of reconstruction performance. The structured reward functions limit the modeling capacity of the reward decoder. Also, although the structured reward functions are differentiable, it is not guaranteed that they can be well optimized through gradient descent. As a result, they may interfere the stability of the learning procedure.

\begin{table*}[t]
\centering
\caption{\textcolor{black}{Ablation Study on Synthetic Dataset}}
\label{table:ablation}
\begin{threeparttable}
\centering
\begin{tabular}{|c|c|c|c|c|c|c|c|}
\hline
\multirow{2}{*}{Model} & \multicolumn{3}{c|}{Car Following ($\Delta t=0.2s$, $T=20$)} & \multicolumn{4}{c|}{Lane Changing ($\Delta t=0.2s$, $T=30$)} \\ \cline{2-8}
& $\mathrm{RMSE}_x(\mathrm{m})$ & $\mathrm{RMSE}_v(\mathrm{m/s})$ & $\mathrm{Accuracy}(\%)$ & $\mathrm{RMSE}_x(\mathrm{m})$ & $\mathrm{RMSE}_y(\mathrm{m})$ & $\mathrm{RMSE}_v(\mathrm{m/s})$ & $\mathrm{Accuracy}(\%)$ \\ \hline GRI & $0.241\pm{0.125}$ & $0.174\pm{0.068}$ & $\mathbf{100.00\pm{0.00}}$ & $0.529\pm{0.230}$ & $0.207\pm{0.046}$ & $0.303\pm{0.128}$ & $\mathbf{99.95\pm{0.01}}$ \\ \hline GRI-VAIRL & $\mathbf{{0.120\pm{0.054}}}$ & $\mathbf{{0.116\pm{0.039}}}$ & $\mathbf{100.00\pm{0.00}}$ & ${0.377\pm{0.201}}$ & $\mathbf{{0.160\pm{0.038}}}$ & ${0.190\pm{0.058}}$ & $50.0\pm{0.00}$ \\ \hline GRI-AIRL & $0.138\pm{0.068}$ & $0.150\pm{0.043}$ & - & $\mathbf{0.304\pm{0.321}}$ & $0.198\pm{0.065}$ & $\mathbf{0.173\pm{0.101}}$ & -\\ \hline
\end{tabular}
\begin{tablenotes}
\item[1] The data is presented in form of $\text{mean}\pm{\text{std}}$.
\end{tablenotes}
\end{threeparttable}
\end{table*}
}
\title{ Real Paley-Wiener theorems \\ for the Dunkl transform on $I\!\!R^d$}

\section { Introduction}
\hspace*{5mm} In the last few years there has been a great interest to real Paley-Wiener theorems for certain integral transforms, see \cite{Ta} for an overview references and details for this question.\\ \hspace*{5mm} In this paper we consider the Dunkl operators $T_j, j=1,...,d$, which are the differential-difference operators introduced by C.F.Dunkl in
\cite{D1}. These operators are very important in pure Mathematics and in Physics. They provide a useful tool in the study of special functions with root systems (see \cite{D2}.)
\\ \hspace*{5mm}C.F.Dunkl in \cite{D3} (see also \cite{J}) has studied a Fourier transform ${\cal F}_{D}$, called Dunkl transform defined for a regular function $f$ by $$\forall \, x \in I\!\!R^d, \; {\cal F}_D f(x) = \displaystyle\int_{I\!\!R^d}K(-ix,y) f(y)\omega_k(y)dy, $$ where $K(-ix,y)$
represents the Dunkl kernel and $\omega_k $ a weight function.\\
\hspace*{5mm} The aim purpose of this paper is to prove real Paley-Wiener theorems on the Schwartz space ${\cal S}(I\!\!R^d)$ and on $L^2_k(I\!\!R^d)$. More precisely we consider first the Paley-Wiener spaces associated with the Dunkl operators:
$$\begin{array}{ccc}
PW_{k}^2(I\!\!R^d) & = & \{f \in {\cal E}(I\!\!R^d)/\forall\, n\, \in\, I\!\!N,\;
\, \triangle_k^n f \in L^2_k(I\!\!R^d) \,
\mbox{and} \, R_f^{\triangle_k} = \displaystyle\lim_{n\to \infty}
||\triangle_k f||_{k,2}^{\frac{1}{2n}} < +\infty\} \\
PW_{k}(I\!\!R^d) & = & \{f \in {\cal E}(I\!\!R^d)/\forall\, n,m\, \in\, I\!\!N, \,
\; (1+||x||)^m \triangle_k^n f \in L^2_k(I\!\!R^d) \,
\; \mbox{and} \, R_f^{\triangle_k} < +\infty\},
\end{array}$$
where $ {\cal E}(I\!\!R^d) $ is the space of $C^\infty$-functions on
$I\!\!R^d$, $\triangle_k = \displaystyle\sum_{j=1}^d T_j^2$ the Dunkl-Laplacian operator, $L^2_k(I\!\!R^d)$ the space of square integrable functions with respect to the measure $\omega_k(x) dx$
and $||.||_{k,2}$ the norm of the space $L^2_k(I\!\!R^d)$.\\ We establish that ${\cal F}_D$ is a bijection from $PW_{k}^2(I\!\!R^d)$
onto $L^2_{k,c}(I\!\!R^d)$(the space of functions in $L^2_k(I\!\!R^d)$
with compact support), and from $PW_{k}(I\!\!R^d)$ onto $D(I\!\!R^d)$(the space of $C^\infty$-functions on $I\!\!R^d$ with compact support).\\
\hspace*{5mm}Next, we characterize the $L^2_k(U)$-functions by their Dunkl transform, where $U$ is respectively a disc, a symmetric body, a nonconvex and an unbounded domain in $I\!\!R^d$. These results are the real Paley-Wiener theorems for square integrable functions with respect to the measure $\omega_k(x)dx$.\\
\hspace*{5mm}We generalize also a theorem of H.H.Bang \cite{B} by characterizing the support of the Dunkl transform of functions in
$ {\cal S}(I\!\!R^d) $ by an
$L^p$ growth condition. More precisely these real Paley-Wiener theorems can be stated as follow:\\ $\bullet $ The Dunkl transform ${\cal F}_D(f)$ of $f \in {\cal S}(I\!\!R^d)$ vanishes outside a polynomial domain $U_P = \{x \in I\!\!R^, \; P(x) \leq 1\}$,
with $P$ a non constant polynomial, if and only if $$\limsup_{n
\to +\infty}||P^n (iT)f||_{k,p} \leq 1, \; 1 \leq p \leq \infty,
$$ with $T = (T_1,...,T_d)$ and $||.||_{k,p}$ is the norm of the space $L^p_k(I\!\!R^d)$ of $p^{th}$ integrable functions on $I\!\!R^d$
with respect to the measure $\omega_k(x)dx$.
\\ $\bullet$ A function $f \in {\cal S}(I\!\!R^d)$ is the Dunkl transform of a function vanishing in some ball with radius $r$
centered at the origin, if and only if $$ \lim_{n \to \infty}
||\displaystyle\sum_{m = 0}^{\infty} \frac{(n\triangle_k)^m\, f
}{m!}||_{k,p}^{\frac{1}{n}}\leq \exp(-r^2), \; 1 \leq p \leq
\infty. $$ \hspace*{5mm} This paper is arranged as follows:\\
\hspace*{5mm}In the second section we recall the main results about the harmonic analysis associated with the Dunkl operators.\\
\hspace*{5mm} The third section is devoted to study the functions such that the support of their Dunkl transform are compact, and to establish the real Paley-Wiener theorems for ${\cal F}_D$ on the Schawrz space ${\cal S}(I\!\!R^d)$.\\
\hspace*{5mm} In the fourth section we characterize the functions in ${\cal S}(I\!\!R^d)$ such that their Dunkl transform vanishes outside a polynomial domain.\\ \hspace*{5mm} In the fifth section we give a necessary and sufficient condition for functions in
$L^2_k(I\!\!R^d)$ such that their Dunkl transform vanishes in a disc.
\\ \hspace*{5mm} We study in the sixth section the functions such that their Dunkl transform satisfies the symmetric body property, and we derive a real Paley-Wiener type theorem for these functions.\\

\section{Harmonic analysis associated for the Dunkl operators.}
\hspace*{5mm} In the first two subsections we collect some notations and results on Dunkl operators, the Dunkl kernel and the Dunkl intertwining operators (see [6],[7],[8]).
\subsection { Reflection groups, root system and multiplicity functions} \hspace*{5mm}We consider $I\!\!R^d$ with the euclidean scalar product $<.,.>$ and $||x||=\sqrt{\langle x,x\rangle}$. On
${I\!\!\!\!C}^{d},\;||.||$ denotes also the standard Hermitian norm\ while\ for\ all\ $z=(z_{1},\;...,\;z_{d}%
),\;w=(w_{1},\;...,\;w_{d})\in{I\!\!\!\!C}^{d},$%
\[
<z,w>=\displaystyle\sum_{j=1}^{d}z_{j}\overline{w}_{j}.
\]
For $\alpha\inI\!\!R^d\backslash\{0\}$, let $\sigma_{\alpha}$ be the reflection in the hyperplan $H_{\alpha}\subsetI\!\!R^d$ orthogonal to
$\alpha$, i.e.
\begin{equation}
\sigma_{\alpha}(x)=x-2\frac{\langle\alpha,x\rangle}{||\alpha||^{2}}\alpha.
\label{2.1}%
\end{equation}
A finite set $R\subsetI\!\!R^d\backslash\{0\}$ is called a root system if $R\capI\!\!R.\alpha=\{\alpha,-\alpha\}$ and $\sigma_{\alpha}R=R$
for all $\alpha\in R$. For a given root system R the reflection
$\sigma _{\alpha},\alpha\in R$, generate a finite group $W\subset O(d)$, the reflection group associated with R . We denote by $|W|$
its cardinality. All reflections in W correspond to suitable pairs of roots. For a given $\beta \inI\!\!R \backslash {\alpha\in R}{\cup}H_{\alpha}$, we fix the positive subsystem
$R_{+}=\{\alpha\in R\;/\langle\alpha,\beta\rangle>0\}$,
then for each $\alpha\in R,$ either $\alpha\in R_{+}$ or $-\alpha\in R_{+}%
$.\newline A function $k:R\longrightarrow{I\!\!\!\!C}$ on a root system
$R$ is called a multiplicity function if it is invariant under the action of the associated reflection group $W$. If one regards $k$
as a function on the corresponding reflections, this means that k is constant on the conjugacy classes of reflections in $W$. For abbreviation, we introduce the index
\begin{equation}
\gamma=\gamma(k)=\displaystyle\sum_{\alpha\in R_{+}}k(\alpha). \label{2.2}%
\end{equation}
Moreover, $\omega_{k}$ denotes the weight function
\begin{equation}
\omega_{k}(x)=\prod_{\alpha\in R_{+}}|\langle\alpha,x\rangle|^{2k(\alpha)},
\label{2.3}%
\end{equation}
which is$\;W-$invariant and homogeneous of degree
$2\gamma$.\newline We introduce the Mehta-type constant
\begin{equation}
c_{k}=(\int_{I\!\!R^{d}}\exp(-||x||^{2})\omega_{k}(x)\;dx)^{-1}, \label{2.4}%
\end{equation}

\noindent{\bf{Remark }}\\ \hspace*{5mm} For $d=1$ and
$W=\mathbf{Z}_{2}$, the multiplicity function $k$ is a single parameter denoted $\gamma>0$ and we have $$
\forall\,x\inI\!\!R,\;\omega_{k}(x)=|x|^{2\gamma}. $$

\subsection{ Dunkl operators- The Dunkl kernel and the Dunkl intertwining operator}

\noindent{\bf{Notations}}. We denote by \\
- $C(I\!\!R^{d}) (resp \;C_{c}
(I\!\!R^{d}))$\, the space of continuous functions on $I\!\!R^{d}$ (resp.
with compact support).\\
- $C^{p}(I\!\!R^{d}) (resp \;C^{p}_{c}
(I\!\!R^{d}))$\, the space of functions of class $C^p$ on $I\!\!R^{d}$
(resp. with compact support).\\
- $ {\cal E}(I\!\!R^{d})$ the space of
$C^{\infty}$-functions on $I\!\!R^{d}$.\\ - $ {C}^\infty_0(I\!\!R^{d})$
the space of $C^{\infty}$-functions on $I\!\!R^{d}$ which vanish at the infinity.
\\ - ${\cal S}(I\!\!R^{d})$ the space of $C^{\infty}$-functions on $I\!\!R^{d}$ which are rapidly decreasing as their derivatives.\\ - $D(I\!\!R^{d})$ the space of
$C^{\infty}$-functions on $I\!\!R^{d}$ which are of compact support.\\
We provide these spaces with the classical topology .\\\\
We consider also the following spaces\\ - ${\cal E'}(I\!\!R^{d})$ the space of distributions on $I\!\!R^{d}$ with compact support. It is the topological dual of ${\cal E}(I\!\!R^{d})$.\\ - ${\cal S'}(I\!\!R^{d})$
the space of tempered distributions on $I\!\!R^{d}$. It is the topological dual of ${\cal S}(I\!\!R^{d})$.\\

The Dunkl operators $T_{j},\; j\; = 1\;, ...,\; d $, on $I\!\!R^{d}$
associated with the finite reflection group W and the multiplicity function k are given by
\begin{equation}
T_{j} f(x) = \frac{\partial}{\partial x_{j}} f(x) +
\displaystyle\sum_{\alpha \in R_{+}}k(\alpha) \alpha_{j}
\frac{f(x) - f(\sigma_{\alpha}(x))}{<\alpha,x>},\quad f \; \in \;
C^{1}(I\!\!R^{d}). \label{h9}
\end{equation}
In the case $k = 0$, the $T_{j}, \, j = 1, ... , d,$ reduce to the corresponding partial derivatives. In this paper, we will assume throughout that $k \geq 0$ and $\gamma > 0$.\\ \hspace*{5mm} The Dunkl Laplacian $\triangle_{k}$ on $I\!\!R^{d}$ is defined by
\begin{equation}
\triangle_{k}f = \displaystyle\sum_{j = 1}^{d}T_{j}^{2}f =
\triangle f + 2 \displaystyle\sum_{\alpha \in R_{+}} k_{\alpha}
\delta_{\alpha} (f), \quad f \in C^{2}(I\!\!R^{d}), \label{h12}
\end{equation}
where $\triangle = \displaystyle\sum_{j = 1}^{d} \partial_{j}^{2}$
the Laplacian on $I\!\!R^{d}$ and
$$ \delta_{\alpha}(f)(x) = \frac{<\nabla f(x),\alpha>}{<\alpha,x>} -
\frac{ f(x) - f(\sigma_{\alpha}(x))}{<\alpha,x>^{2}},$$ with
$\nabla f$ the gradient of f.\\ \hspace*{5mm} For $f $ in $
C_{c}^{1}(I\!\!R^{d})$ and $g \, in \, C^{1}(I\!\!R^{d})$ we have
\begin{equation}
\int_{I\!\!R^{d}} T_{j}f(x) g(x)\omega_{k}(x)\;dx = - \int_{I\!\!R^{d}}
f(x) T_{j}g(x)\omega_{k}(x)\;dx, \, j = 1, ..., d.
\label{hh6}
\end{equation}

For $y \in I\!\!R^{d} $, the system $$ \left\{
\begin{array}{crll}
T_{j}u(x,y) &=& y_{j} u(x,y),& j = 1, ..., d,\\\\ u(0,y) &=& 1,
&for\, all \; y \in \,I\!\!R^{d}.
\end{array}
\right. $$ admits a unique analytic solution on $I\!\!R^{d}$, denoted by $K(x,y)$ and called Dunkl kernel. This kernel has a unique holomorphic extension to ${I\!\!\!\!C}^{d} \times {I\!\!\!\!C}^{d}$.\\
\noindent{\bf{Example. }}\\ \hspace*{5mm} If $d = 1$ and $W ={\
Z}_{2}$, the Dunkl kernel is given by
\begin{equation}
K(z,w) = j_{\gamma - \frac{1}{2}}(izw) + \frac{zw}{2 \gamma + 1}
j_{\gamma + \frac{1}{2}}(izw), \quad z, \; w \in I\!\!\!\!C,
\label{h18}
\end{equation}
where for $\alpha \geq \frac{-1}{2}$, $j_{\alpha}$ is the normalized Bessel function of index $\alpha$
defined by
\begin{equation}
j_{\alpha}(z) = 2^{\alpha} \Gamma(\alpha + 1)
\frac{J_{\alpha}(z)}{z^{\alpha}} = \Gamma(\alpha + 1)
\displaystyle\sum_{n = 0}^{\infty}\frac{(-1)^{n}(\frac{z}{2})^{2 n} } {n! \Gamma(\alpha + 1 + n)}
\end{equation}
with $J_{\alpha}$ is the Bessel function of first kind and index
$\alpha$.\\\\
\hspace*{5mm} The Dunkl kernel possesses the following properties

\begin{Prop}\hspace*{-2mm}.i) For all $z, w \in I\!\!\!\!C^{d}$ we have.
\begin{equation}
K(z,w) = K(w,z) \quad ; K(z,0) = 1 \quad and\quad K(\lambda z,w) = K(z, \lambda w),\, for\; all \; \lambda \in I\!\!\!\!C.
\label{h20}
\end{equation}
\hspace*{5mm} ii) For all $\nu \in I\!\!N^{d}, x \in I\!\!R^{d}$ and $z \in I\!\!\!\!C^{d}$, we have
\begin{equation}
|D_{z}^{\nu} K(x , z)| \leq ||x||^{|\nu|} \,\exp(||x|| || Rez||),
\label{h21}
\end{equation}
and for all $x, y \in I\!\!R^{d}$ :
\begin{equation}
|K(i x , y)| \leq 1, \label{h23}
\end{equation}
with $D_{z}^{\nu} = \frac{\partial^{\nu}}{\partial z_{1}^{\nu_1}...\partial z_{d}^{\nu_d}}$ and $|\nu| = \nu_1 + ...
+ \nu_d.$\\ \hspace*{5mm} iii) For all $x, y \in I\!\!R^{d}$ and $g
\in W$ we have
\begin{equation}
K(-i x , y) = \overline{K(i x , y)}, \quad and \quad K(g x , g y)
= K( x , y).
\end{equation}
\label{P1.2}
\hspace*{5mm}$i\nu)$ The function $K(x,z)$ admits for all $x \in I\!\!R^{d}$ and $z \in I\!\!\!\!C^{d}$ the following Laplace type integral representation
\begin{equation}
K(x,z) = \displaystyle\int_{I\!\!R^d} e^{<y,z>} d\mu_{x}(y), \label{753}
\end{equation}
where $\mu_{x}$ is a probability measure on $I\!\!R^d$, with support in the closed ball $B(o, ||x||)$ of center o and radius
$||x||$.(See [11]).\end{Prop}

The Dunkl intertwining operator $V_k$ is defined on $C(I\!\!R^{d})$ by
\begin{equation}
\forall x \in I\!\!R^{d}, \quad V_k f(x) = \displaystyle\int_{I\!\!R^d}
f(y)d\mu_{x}(y), \label{str}
\end{equation}
where $\mu_{x}$ is the measure given by the relation (\ref{753}).
\\ The operator $V_k$ satisfies the following properties
\\ \hspace*{5mm}i)We have $$ \forall x \in I\!\!R^{d}, \; \; \forall z \in I\!\!\!\!C^{d}, \;
\; K(x,z) = V_k (e^{<.,z>})(x). $$ \hspace*{5mm} ii)The operator
$V_k$ is a topological isomorphism from ${\cal E}(I\!\!R^{d})$ onto itself satisfying the transmutation relation
\begin{equation}
\forall x \in I\!\!R^{d}, \quad T_j { V}_k (f)(x) = {V}_k
(\displaystyle\frac{\partial}{\partial y_j}f)(x), \quad j = 1, ... , d,
f \in {\cal E}(I\!\!R^d).
\label{9}
\end{equation}
\hspace*{3mm}ii) For each $x \in I\!\!R^d$ there exists a unique distribution $\eta_x$ in ${\cal E'}(I\!\!R^d)$ with support in the ball $B(o, ||x||)$, such that for all f in ${\cal E}(I\!\!R^d)$ we have
\begin{equation}
V_k^{-1}f(x) = <\eta_x, f>. \label{10}
\end{equation}
(See [16]).
\subsection{ The Dunkl transform}
\noindent{\bf{Notations}}. We denote by $L_{k}^{p}(I\!\!R^{d})$ the space of measurable functions on $I\!\!R^{d}$ such that $$
\begin{array}{crl}
||f||_{k,p}& =& (\displaystyle \int_{I\!\!R^{d}} |f(x)|^{p}
\omega_{k}(x) \;dx)^{\frac{1}{p}} < +\infty,
\quad if \; 1 \leq p
< + \infty,\\\\ ||f||_{k,\infty}& = & ess\; sup _{x \in I\!\!R^{d} }
|f(x)| < +\infty.
\end{array}
$$

\hspace*{5mm}The Dunkl transform of a function f in $D(I\!\!R^{d})$ is given by
\begin{equation}
\forall y \in I\!\!R^{d}, \quad {\cal F}_{D}(f) (y) =
\displaystyle\int_{I\!\!R^{d}}f(x) K(-iy,x) \omega_{k}(x)dx . \label{13}
\end{equation}
We give in the following some properties of this transform. (See
[7][8]).
\\\\ \hspace*{5mm} i) For all f in $L_{k}^{1}(I\!\!R^{d})$ we have
\begin{equation}
||{\cal F}_{D} (f)|| _{k, \infty} \leq ||f||_{k, 1}. \label{h26}
\end{equation}
\hspace*{5mm} ii) For all $f$ in ${\cal S}(I\!\!R^{d})$ we have
\begin{equation}
\forall y \in I\!\!R^{d}, \quad {\cal F}_{D}( T_j f)( y ) = i y_j
{\cal F}_{D}( f ) (y) \quad , j = 1, ..., d. \label{h28}
\end{equation}
\hspace*{5mm} $iii)$ For all f in $L_{k}^{1}(I\!\!R^{d})$ such that
${\cal F}_{D}(f)$ is in $L_{k}^{1}(I\!\!R^{d})$, we have the inversion formula
\begin{equation}
f(y) = \frac{c_{k}^{2}}{4^{\gamma + \frac{d}{2}}}\displaystyle
\int_{I\!\!R^{d}} {\cal F}_{D}(f)(x)
K(i x , y) \omega_{k}(x)\; dx
, \quad a.e. \label{h31}
\end{equation}
\begin{Th}\hspace*{-2mm}. The Dunkl transform ${\cal F}_{D}$
is a topological isomorphism. \\ \hspace*{5,5mm} i) From ${\cal S}(I\!\!R^{d})$ onto itself. \\ \hspace*{5,5mm} ii) From $D(I\!\!R^{d})$
onto $ {H}(I\!\!\!\!C^{d})$ (the space of entire functions on $I\!\!\!\!C^{d}$,
rapidly decreasing and of exponential type.) \\\noindent The inverse transform ${\cal F}_{D}^{- 1}$ is given by
\begin{equation}
\forall y \in I\!\!R^{d}, \quad {\cal F}_{D}^{-1}(f)(y) = \frac{c_{k}^{2}}{4^{\gamma + \frac{d}{2}}}
{\cal F}_{D}(f)(-y), \quad f \in S(I\!\!R^{d}). \label{h32}
\end{equation}
\end{Th}
\begin{Th} \hspace*{-2mm}. i) Plancherel formula for ${\cal F}_D$ .\\ For all f in ${\cal S}(I\!\!R^{d})$ we have
\begin{equation}
\displaystyle \int_{I\!\!R^{d}} |f(x)|^{2}
\omega_{k}(x)\; dx = \displaystyle \frac {c_{k}^{2}} {4^{\gamma + \frac{d}{2} } }\displaystyle \int_{I\!\!R^{d}}
| {\cal F}_{D}(f)(\xi)|^{2} \omega_{k}(\xi)\; d\xi. \label{h33}
\end{equation}
\hspace*{5mm} ii) Plancherel theorem for ${\cal F}_{D}$.\\The renormalized Dunkl transform $f \to 2^{-( \gamma + \frac{d}{2})}
c_{k} {\cal F}_{D}(f)$ can be uniquely extended to an isometric isomorphism on $L_{k}^{2}(I\!\!R^{d})$. \label{Tp}\end{Th}
\begin{Prop}\hspace*{-2mm}.
Let $1 \leq p \leq 2$. The Dunkl transform ${\cal F}_{D}$ can be extended to a continuous mapping from $L_{k}^{p}(I\!\!R^{d})$ into
$L_{k}^{q}(I\!\!R^{d}),$ with $q$ the conjugate component of $p$.
\label{cher}
\end{Prop}
\begin{Def}\hspace*{-2mm}. i) The Dunkl transform of a distribution $\tau$ in ${\cal S}'(I\!\!R^{d})$ is defined by $$ <
{\cal F}_{D}(\tau), \phi > = < \tau,{\cal F}_{D }(\phi)>, \quad
\phi \in {\cal S}(I\!\!R^{d}). $$ \hspace*{5mm} ii) We define the Dunkl transform of a distribution $\tau$ in ${\cal E'}(I\!\!R^d)$ by
$$ \forall \, y \in I\!\!R^d, \; {\cal F}_{D}(\tau)(y) = \langle
\tau_x, K(-ix,y) \rangle.$$
\end{Def}
\begin{Th}\hspace*{-2mm}.
The Dunkl transform ${\cal F}_{D}$
is a topological isomorphism. \\ \hspace*{5,5mm} i) From ${\cal S'}(I\!\!R^{d})$ onto itself. \\ \hspace*{5,5mm}ii) From ${\cal E}'(I\!\!R^{d})$ onto ${\cal H}(I\!\!\!\!C^{d})$(the space of entire functions on $I\!\!\!\!C^{d}$, slowly increasing and of exponential type.)
\end{Th}
\hspace*{5mm}Let $\tau$ be in ${\cal S'}(I\!\!R^d)$. We define the distribution $T_j \tau$, $j=1,...,d,$ by $$<T_j \tau, \psi> = -
<\tau, T_j \psi>, \; \mbox{ for\, all} \; \psi \, \in \; {\cal S}(I\!\!R^d).$$ This distribution satisfies the following properties
\begin{eqnarray}
{\cal F}_{D}(T_j \tau) &=& i y_j {\cal F}_{D}( \tau),\quad j = 1,
..., d. \label{sol1}
\\
{\cal F}_{D}(\triangle_k \tau) &=& -|| y||^2 {\cal F}_{D}( \tau).
\label{sol2}
\end{eqnarray}
\hspace*{5mm}We consider $f$ in $L^2_k(I\!\!R^d)$.We define the distribution $T_f$ in ${\cal S'}(I\!\!R^d)$ by $$\langle T_f,\varphi
\rangle = \displaystyle\int_{I\!\!R^d}f(x)\varphi(x)\omega_k(x)dx, \; \varphi \in
{\cal S}(I\!\!R^d).$$ In the following $T_f$ will be denoted by
$f$.
\begin{Prop}\hspace*{-2mm}. Let $f$ be in $L^2_k(I\!\!R^d)$. Then we have
\begin{equation}\label{ppppp}
{\cal F}_{D}(\triangle_k f) = -||x||^2 {\cal F}_{D}(f).
\end{equation}
\end{Prop}
\noindent{\bf{Proof}}\\\hspace*{5mm} For all $\varphi \in {\cal S}(I\!\!R^d)$ we have $$\langle \triangle_k f,\varphi\rangle =
\langle f,\triangle_k \varphi\rangle = \displaystyle\int_{I\!\!R^d}f(x)\triangle_k
\varphi(x)\omega_k(x)dx.$$ But $$\begin{array}{lll}
\langle {\cal F}_{D}(\triangle_k f),\varphi\rangle & = & \langle \triangle_k f,
{\cal F}_{D}(\varphi)\rangle = \langle f,\triangle_k {\cal F}_{D}(\varphi)\rangle\\
& = & \displaystyle\int_{I\!\!R^d}f(y){\cal F}_{D}(-||x||^2 \varphi(.)) (y) \omega_k(y)dy\\
& = & -\displaystyle\int_{I\!\!R^d}{\cal F}_{D}(f)(x)||x||^2 \varphi(x)
\omega_k(x)dx \\ &=& \langle -|| x||^2 {\cal F}_{D}(f),\varphi\rangle.
\end{array}$$
Thus $${\cal F}_{D}(\triangle_k f) = -||x||^2 {\cal F}_{D}(f).$$
\noindent{\bf{Notations.}} We denote by\\
\hspace*{5mm} - $L^2_{k,c}(I\!\!R^d)$ the space of functions in $L^2_{k}(I\!\!R^d)$
with compact support.\\
\hspace*{5mm} - ${\cal H}_{L^2_k}(I\!\!\!\!C^d)$ the space of entire functions $f$
on $I\!\!\!\!C^d$ of exponential type such that $f_{|I\!\!R^d}$ belongs to $L^2_{k}(I\!\!R^d)$.
\begin{Th}\hspace*{-2mm}.
The Dunkl transform ${\cal F}_D$ is bijective from
$L^2_{k,c}(I\!\!R^d)$ onto ${\cal H}_{L^2_k}(I\!\!\!\!C^d)$.
\end{Th}
\noindent{\bf{Proof}}\\ \hspace*{5mm}i) We consider the function
$f$ on $I\!\!\!\!C^d$ given by \begin{equation} \forall \, z \in I\!\!\!\!C^d, \;
f(z) = \displaystyle\int_{I\!\!R^d}g(x) K(-ix,z)
\omega_k(x)dx,\label{159753}\end{equation} with $g \in L^2_{k,c}(I\!\!R^d)$.\\ By derivation under the integral sign and by using the inequality (11), we deduce that the function
$f$ is entire on $I\!\!\!\!C^d$ and of exponential type.\\ On the other hand the relation (\ref{159753}) can also be written in the form $$ \forall \; y \in I\!\!R^d, \; f(y) = {\cal F}_{D}(g)(y).$$ Thus from Theorem \ref{Tp} the function
$f_{|I\!\!R^d}$ belongs to $L^2_{k}(I\!\!R^d)$. Thus $f \in {\cal H}_{L^2_k}(I\!\!\!\!C^d)$.
\\ \hspace*{5mm}ii) Reciprocally let $\psi $ be in ${\cal H}_{L^2_k}(I\!\!\!\!C^d)$. From Theorem 2.6 ii) there exists $S \in {\cal E'}(I\!\!R^d)$ with support in the ball $B(o,a)$ of center $o$ and radius $a$, such that
\begin{equation}\label{tgvam}
\forall \, y \in I\!\!R^d, \; \psi(y) = \langle S_x,
K(-ix,y)\rangle.
\end{equation}
On the other hand as $\psi_{|I\!\!R^d}$ belongs to $L^2_{k}(I\!\!R^d)$,
then from Theorem \ref{Tp} there exists \linebreak $h \in L^2_{k}(I\!\!R^d)$ such that
\begin{equation}\label{tgvam1}
\psi_{|I\!\!R^d} = {\cal F}_{D}(h).
\end{equation}
Thus from (\ref{tgvam}), for all $\varphi \in D(I\!\!R^d)$ we have
$$\begin{array}{lll}
\displaystyle\int_{I\!\!R^d}\psi(y) \overline{{\cal F}_{D}(\varphi)(y)}\omega_k(y)dy & = &
\langle S_x,\displaystyle\int_{I\!\!R^d}
K(-ix,y)\overline{{\cal F}_{D}(\varphi)(y)}\omega_k(y)dy\rangle.
\end{array}$$
Thus using (22) we deduce that
\begin{equation}\label{hhhhh}
\displaystyle\int_{I\!\!R^d}\psi(y) \overline{{\cal F}_{D}(\varphi)(y)}\omega_k(y)dy = \frac{4^{\gamma +
\frac{d}{2}}}{c_k^2}
\langle S,\varphi\rangle.
\end{equation}
On the other hand (\ref{tgvam1}) implies $$\displaystyle\int_{I\!\!R^d}\psi(y)
\overline{{\cal F}_{D}(\varphi)(y)}\omega_k(y)dy =
\displaystyle\int_{I\!\!R^d}{\cal F}_{D}(h)(y) \overline{{\cal F}_{D}(\varphi)(y)}\omega_k(y)dy.$$ But from Theorem 2.2 we deduce that
\begin{equation}\label{uuuuu}\begin{array}{lll}
\displaystyle\int_{I\!\!R^d}{\cal F}_{D}(h)(y) \overline{{\cal F}_{D}(\varphi)(y)}\omega_k(y)dy &=& \frac{4^{\gamma +
\frac{d}{2}}}{c_k^2}\displaystyle\int_{I\!\!R^d}h(y) \varphi(y)\omega_k(y)dy
\nonumber\\ &=& \frac{4^{\gamma + \frac{d}{2}}}{c_k^2}\langle T_{h\omega_k},\varphi\rangle.
\end{array}
\end{equation}
Thus the relations (\ref{hhhhh}),(\ref{uuuuu}) imply $$S =
T_{h\omega_k}.$$ This relation shows that the support $h$ is compact. Then $h \in L^2_{k,c}(I\!\!R^d)$

\subsection{ The Dunkl\
translation operator and the Dunkl convolution product}

\begin{Def}\hspace*{-2mm}. Let $y \, \in I\!\!R^{d}$. The Dunkl translation operator $f \mapsto \tau_y f$ is defined on ${\cal S}(I\!\!R^d)$ by
\begin{equation} \forall \, x \in I\!\!R^d, \;
{\cal F}_D (\tau_{y}f)(x)= K(-ix,y){\cal F}_D (f)(y). %
\label{2.38}%
\end{equation}
\end{Def}
\noindent{\bf{Example}} \\ \hspace*{5mm}Let $t > 0$, we have
\begin{equation}\label{tgsz}\forall \, x \in \, I\!\!R^d,
\;\tau_{x}(e^{-t||\xi||^2})(y) = \frac{M_k}{t^{\gamma +
\frac{d}{2}}}
K(\frac{x}{\sqrt{2t}},\frac{y}{\sqrt{2t}})
e^{-\frac{||x||^2 + ||y||^2}{4t}},
\end{equation}
with $M_k = (2^{\gamma+\frac{d}{2}}c_k)^{-1}$.\\
\noindent{\bf{Remark}}
\\ \hspace*{5mm} The operator $\tau_y$, $y
\in I\!\!R^d$, can also be defined on ${\cal E}(I\!\!R^d)$ by
\begin{equation}
\forall \, x \in I\!\!R^d, \;\tau_{y}f(x)= (V_k)_x (V_k)_y[(V_k)^{-1}(f)(x+y). %
\label{2.389}%
\end{equation}
(See \cite{T5}).\\ \hspace*{5mm} At the moment an explicit formula for the Dunkl translation operator is known only in the following two cases. \\
\underline{1$^{st}$ cas }: $d = 1$ and $W = {\bf Z}_2$. \\ For all $f
\in C(I\!\!R)$ we have $$\begin{array}{ccc}
\forall \, x \in I\!\!R, \tau_{y}f(x) & = & \frac{1}{2}\displaystyle\int_{-1}^{1}f(\sqrt{x^2 + y^2 -2xyt})
(1+\frac{x-y}{\sqrt{x^2 + y^2 -2xyt}})\Phi_k(t)dt\\
& + & \frac{1}{2}\displaystyle\int_{-1}^{1}f(-\sqrt{x^2 + y^2 -2xyt})
(1-\frac{x-y}{\sqrt{x^2 + y^2 -2xyt}})\Phi_k(t)dt,
\end{array}$$ where $$\Phi_k(t) = \frac{\Gamma(k+\frac{1}{2})}{\sqrt{\pi}%
\Gamma(k)} (1+t)(1-t^2)^{k-1}.$$ Moreover for all $f \in L^p_k(I\!\!R)$, $1 \leq p \leq \infty$, we have
$$ ||\tau_{y}f||_{k,p} \leq 3 ||f||_{k,p}, \quad 1 \leq p \leq \infty. $$ (See [10][13]).\\ \underline{2$^{nd}$ cas }: For all $f \in {\cal E}(I\!\!R^d)$ radial we have $$ \forall \, x \in
I\!\!R^d, \; \tau_{y}f(x) = V_k [f_0 (\sqrt{||x||^2 + ||y||^2 +2
\langle x,.\rangle })](y),$$ with $f_0$ the function on
$[0,+\infty[$ given by $f(x) = f_0(||x||)$. \\ Moreover for all $f
\in L^p_k(I\!\!R^d)$, $1 \leq p \leq \infty$, we have
$$ ||\tau_{y}f||_{k,p} \leq ||f||_{k,p}, \quad 1 \leq p \leq \infty. $$ (See [11][13]).\\ \hspace*{5mm}Using the Dunkl translation operator, we define the Dunkl convolution product of functions as follows (See [11][17]).
\begin{Def}\hspace*{-2mm}. For $f,g$ in $D(I\!\!R^d)$, we define the Dunkl convolution product by%
\begin{equation}
\forall \, x \in I\!\!R^d, \; f*_{D}g(x)=\int_{I\!\!R^d}\tau^{x}f(-y)g(y)d\omega_{k}(y).\label{2.42}%
\end{equation}
\end{Def}
This convolution is commutative and associative and satisfies the following properties. (See [13]).\\
\begin{equation}\hspace*{-97mm}i) {\cal F}_D (f*_{D}g) = {\cal F}_D (f){\cal F}_D (g).\end{equation}
\hspace*{5mm}ii) Let $1\leq p,q,r\leq+\infty,\;$such that $\frac{1}%
{p}+\frac{1}{q}-\frac{1}{r}=1.\;$If $f\;$is in $L^{p}_{k}(I\!\!R^d)$
radial and $g$ an element of $L^{q}_{k}(I\!\!R^d),\;$ then $f*_{D}g\;$
belongs to $L^{r}_{k}(I\!\!R^d)\;$ and we have
\begin{equation}
\left\| f*_{D}g\right\| _{r,k}\leq \left\| f\right\| _{p,k}
\left\| g\right\| _{q,k}.\label{2.43999}%
\end{equation}
\hspace*{5mm}iii) Let $d = 1$ and $W = {\bf Z}_2$. For all
$f\;$ in $L^{p}_{k}(I\!\!R)$ and $g\;$ an element of
$L^{q}_{k}(I\!\!R)$, the function $f*_{D}g$ belongs to
$L^{r}_{k}(I\!\!R)\;$ with $\frac{1}%
{p}+\frac{1}{q}-\frac{1}{r}=1.\;$ and we have
\begin{equation}
\left\| f*_{D}g\right\| _{r,k}\leq 3\left\| f\right\| _{p,k}
\left\| g\right\| _{q,k}.\label{2.43}%
\end{equation}

\section{Functions with compact spectrum } \hspace*{5mm} First we recall that the spectrum of a function is the support of its Dunkl transform. \\We begin this section by the following definition.
\begin{Def}\hspace*{-2mm}.
i) We define the support of $ g \in L^2_k(I\!\!R^d)$ and we denote it by
$\mbox{supp }\, g$,
the smallest closed set, outside which the function $g$
vanishes almost everywhere. \\ \hspace*{5mm}ii) We denote by $$R_g = \displaystyle \sup_{ \lambda \in suppg}
||\lambda||,$$ the radius of the support of $g$.\\
\noindent{{\bf{ Remark}}}\\ \hspace*{5mm} It is clear that $R_g$
is finite if and only if, $g$ has compact support.
\end{Def}
\begin{Prop}\hspace*{-2mm}. Let $g \in L^2_k(I\!\!R^d)$ such that for all $n \in I\!\!N$, the function
$||\lambda||^{2n}g(\lambda)$ belongs to $ L^2_k(I\!\!R^d)$. Then
\begin{equation}\label{g}
R_g = \lim_{n \to \infty}\left\{\displaystyle\int_{I\!\!R^d}||\lambda||^{4n}|g(\lambda)|^2
\omega_k(\lambda)d\lambda\right\}^{\frac{1}{4n}}.
\end{equation}
\end{Prop}
\noindent{\bf{Proof}}\\ \hspace*{5mm} We suppose that
$||g||_{k,2} \neq 0$, otherwise $R_g = 0$ and formula (\ref{g})
is trivial.\\ \hspace*{5mm}Assume now that $g$ has compact support with $R_g
> 0$. Then $$ \left\{\displaystyle\int_{I\!\!R^d} ||\lambda||^{4n} |g(\lambda)|^2
\omega_k(\lambda) d\lambda\right\}^{\frac{1}{4n}} \leq
\left\{\displaystyle\int_{||\lambda|| \leq R_g}|g(\lambda)|^2
\omega_k(\lambda)d\lambda\right\}^{\frac{1}{4n}}R_g.$$ Thus we deduce that
$$\limsup_{n \to
\infty}\left\{\displaystyle\int_{I\!\!R^d}||\lambda||^{4n}|g(\lambda)|^2
\omega_k(\lambda)d\lambda\right\}^{\frac{1}{4n}} \leq
\limsup_{n \to \infty}\left\{\displaystyle\int_{||\lambda|| \leq R_g}|g(\lambda)|^2
\omega_k(\lambda)d\lambda\right\}^{\frac{1}{4n}}R_g = R_g.$$ On the other hand, for any positive
$\varepsilon$ we have
$$\displaystyle\int_{R_g - \varepsilon \leq ||\lambda|| \leq R_g}|g(\lambda)|^2
\omega_k(\lambda)d\lambda > 0.$$ Hence
$$\liminf_{n \to \infty}\left\{\displaystyle\int_{I\!\!R^d}||\lambda||^{4n}|g(\lambda)|^2
\omega_k(\lambda)d\lambda\right\}^{\frac{1}{4n}} \geq
\liminf_{n \to \infty}\left\{\displaystyle\int_{R_g - \varepsilon \leq ||\lambda|| \leq R_g}
||\lambda||^{4n}|g(\lambda)|^2
\omega_k(\lambda)d\lambda\right\}^{\frac{1}{4n}} \geq R_g -
\varepsilon.$$
Thus
$$R_g = \lim_{n \to
\infty}\left\{\displaystyle\int_{I\!\!R^d}||\lambda||^{4n}|g(\lambda)|^2
\omega_k(\lambda)d\lambda\right\}^{\frac{1}{4n}}.$$
\hspace*{5mm} We prove now the assertion in the case where $g$
has unbounded support. Indeed For any positive $N$, we have
$$\displaystyle\int_{ ||\lambda|| \geq N}|g(\lambda)|^2
\omega_k(\lambda)d\lambda > 0.$$ Thus
$$\liminf_{n \to \infty}\left\{\displaystyle\int_{I\!\!R^d}||\lambda||^{4n}|g(\lambda)|^2
\omega_k(\lambda)d\lambda\right\}^{\frac{1}{4n}} \geq
\liminf_{n \to \infty}\left\{\displaystyle\int_{ ||\lambda|| \geq N}
||\lambda||^{4n}|g(\lambda)|^2
\omega_k(\lambda)d\lambda\right\}^{\frac{1}{4n}} \geq N.$$
This implies that $$\liminf_{n \to \infty}\left\{\displaystyle\int_{I\!\!R^d}||\lambda||^{4n}|g(\lambda)|^2
\omega_k(\lambda)d\lambda\right\}^{\frac{1}{4n}} = \infty.$$
\noindent{\bf{Notations.}} We denote by\\
\hspace*{5mm} - $L^2_{k,R}(I\!\!R^d) := \{g \in L^2_{k,c}(I\!\!R^d) / R_g =
R\}$, for $R \geq 0$.\\ \hspace*{5mm} - $D_{R}(I\!\!R^d) := \{g \in D(I\!\!R^d) / R_g =
R\}$, for $R \geq 0$.
\begin{Def}\hspace*{-2mm}.
We define the Paley-Wiener spaces $PW^2_k(I\!\!R^d)$ and $PW^2_{k,R}(I\!\!R^d)$ as follows\\
i) $PW^2_k(I\!\!R^d)$ is the space of functions $f \in {\cal E}(I\!\!R^d)$ satisfying\\
\hspace*{5mm} a) $\triangle_k^n f \in L^2_{k}(I\!\!R^d)$ for all
$n \in I\!\!N$.\\ \hspace*{5mm} b) $R_f^{\triangle_k} := \displaystyle \lim_{n \to \infty}
||\triangle_k ^n f||_{k,2}^{\frac{1}{2n}} < \infty.$\\
ii) $PW^2_{k,R}(I\!\!R^d) := \{f \in PW^2_k(I\!\!R^d) / R_f^{\triangle_k}
= R\}$.
\end{Def}

The real $L^2$-Paley-Wiener theorem for the Dunkl transform can be formulated as follows
\begin{Th}\hspace*{-2mm}.
The Dunkl transform ${\cal F}_D$ is a bijection \\
\hspace*{5mm}i) from $PW^2_{k,R}(I\!\!R^d)$ onto
$L^2_{k,R}(I\!\!R^d)$.\\ \hspace*{5mm} ii)from $PW^2_k(I\!\!R^d)$
onto $L^2_{k,c}(I\!\!R^d)$,\\
\end{Th}
\noindent{\bf{Proof}}\\ \hspace*{5mm} i) Let $g \in PW^2_{k,R}(I\!\!R^d)$. Then from Proposition 2.7 the function ${\cal F}_D(\triangle_k^n g)(\xi) = (-1)^n ||\xi||^{2n}{\cal F}_D(g)(\xi)
$ belongs to $ L^2_{k}(I\!\!R^d)$ for all $n \in I\!\!N$. On the other hand from Theorem 2.3 we deduce that $$ \lim_{n \to
\infty}\left\{\displaystyle\int_{I\!\!R^d}||\xi||^{4n}|{\cal F}_D(g)(\xi)|^2
\omega_k(\xi)d\xi\right\}^{\frac{1}{4n}} = \lim_{n \to
\infty}\left\{\displaystyle\int_{I\!\!R^d}|\triangle_k g(x)|^2
\omega_k(x)dx\right\}^{\frac{1}{4n}} = R.$$
Thus using Proposition 3.2 we conclude that ${\cal F}_D(g)$ has compact support with $ R_{{\cal F}_D(g)} = R$.\\ \hspace*{5mm}
Conversely let $f \in L^2_{k,R}(I\!\!R^d)$. Then $||\xi||^n f(\xi) \in L^1_k(I\!\!R^d)$ for any $n \in I\!\!N$, and ${\cal F}_D^{-1} f \in D(I\!\!R^d)$. On the other hand from Theorem 2.3 we have $$ \lim_{n
\to \infty}\left\{\displaystyle\int_{I\!\!R^d}|\triangle_k^n ({\cal F}_D^{-1}
f)(x)|^2
\omega_k(x)dx\right\}^{\frac{1}{4n}} = \lim_{n \to
\infty}\left\{\displaystyle\int_{I\!\!R^d}||\xi||^{4n}|f(\xi)|^2
\omega_k(\xi)d\xi\right\}^{\frac{1}{4n}} = R.$$
Thus ${\cal F}_D^{-1}
(f) \in PW^2_{k,R}(I\!\!R^d)$.\\ \hspace*{5mm}ii) We deduce ii) from i).
\begin{Cor}\hspace*{-2mm}.
The Dunkl transform ${\cal F}_D$ is a bijection from $PW^2_{k}(I\!\!R^d)$ onto
${\cal H}_{L^2_k}(I\!\!\!\!C^d)$.
\end{Cor}
\noindent{\bf{Proof}}\\ \hspace*{5mm} We deduce the result from Theorem 3.4 ii) and Theorem 2.8.
\begin{Def}\hspace*{-2mm}.
i) The Paley-Wiener space $PW_k(I\!\!R^d)$ is the space of functions $f \in {\cal E}(I\!\!R^d)$ satisfying\\
\hspace*{5mm} a) $(1+||x||)^m \triangle_k^n \in L^2_{k}(I\!\!R^d)$ for all
$n$,$m$ $\in$ $I\!\!N$.\\ \hspace*{5mm} b) $R_f^{\triangle_k} := \lim_{n \to \infty}
||\triangle_k ^n f||_{k,2}^{\frac{1}{2n}} < \infty.$\\
\hspace*{5mm} ii) We have $PW_{k,R}(I\!\!R^d) := \{f \in PW_k(I\!\!R^d) /
R_f^{\triangle_k} = R\}$, for $R \geq 0$.
\end{Def}
\noindent{{\bf{Remark}}}\\ \hspace*{5mm}
We notice that the only difference between $PW_k^2(I\!\!R^d)$ and
$PW_k(I\!\!R^d)$is the extra requirement of polynomial decay to help ensure that ${\cal F}_D (f) \in {\cal E}(I\!\!R^d)$.\\

The real Paley-Wiener theorem for the Dunkl transform of functions in the preceding spaces is the following
\begin{Th}\hspace*{-2mm}.
The Dunkl transform ${\cal F}_D$ is a bijection \\ \hspace*{5mm}i) from $PW_{k,R}(I\!\!R^d)$ onto
$D_{R}(I\!\!R^d)$.\\\hspace*{5mm}ii) from $PW_k(I\!\!R^d)$
onto $D(I\!\!R^d)$.\\
\end{Th}
\noindent{\bf{Proof}}\\ \hspace*{5mm} i)Let $g \in PW_{k,R}(I\!\!R^d)
\subset PW^2_{k,R}(I\!\!R^d)$. Then ${\cal F}_D (g) \in {\cal E}(I\!\!R^d)$ since $g$ has polynomial decay, and by Theorem 3.4 the function ${\cal F}_D (g)$ has compact support with $R_{{\cal F}_D
(g)} = R$.\\ \hspace*{5mm} Conversely Let $f \in D_R (I\!\!R^d)$, then
${\cal F}_D^{-1}(f) \in {\cal S}(I\!\!R^d)$ and ${\cal F}_D^{-1}(f)
\in PW^2_{k,R}(I\!\!R^d)$ by Theorem 3.4.\\ \hspace*{5mm}ii) We deduce the result from the i).
\section{Dunkl transform of functions, with polynomial domain support} Let $P(x)$ be a non-constant polynomial.
\begin{Th}\hspace*{-2mm}. For any function $f \in {\cal S}(I\!\!R^d)$ the following relation holds
\begin{equation}\label{aze}
\lim_{n\to\infty} ||P(iT)^n f||_{k,p}^{\frac{1}{n}} = \sup_{y \in supp {\cal F}_D(f)}|P(y)|,
\;1
\leq p \leq \infty,
\end{equation}
with $T = (T_1,...,T_d)$.
\end{Th}
\noindent{\bf{Proof}}\\ \hspace*{5mm} We consider $ f \neq 0$ in
${\cal S}(I\!\!R^d)$. Set $q = \frac{p}{p-1}$ if $1 < p < \infty$ and
$q = 1$ or $\infty$ if $p = \infty$ or $1$.\\ The proof is divided in several steps.\\ In the following three steps we suppose that
\begin{equation}\label{wahid}
0 < \sup_{y \in supp \, {\cal F}_D(f)}|P(y)| < \infty.\end{equation}
\\
{\bf{\underline{First step}}}: In this step we shall prove that
$$\limsup_{n\to\infty} ||P(iT)^n f||_{k,p}^{\frac{1}{n}} \leq
\sup_{y \in supp {\cal F}_D(f)}|P(y)|, \; 1 \leq p \leq \infty.$$
$\bullet$ Let $2
\leq p < \infty$.
Applying Proposition \ref{cher}
we obtain
\begin{eqnarray}\label{l15}
||P(iT)^n f||_{k,p} &\leq& C ||P(\xi)^n {\cal F}_D(f)||_{k,q},\\
\\
& \leq & C (\sup_{y \in supp {\cal F}_D(f)}|P(y)|)^n || {\cal F}_D(f)
||_{k,q}. \nonumber
\end{eqnarray}
Thus
\begin{equation}\label{l16}
\limsup_{n\to\infty} ||P(iT)^n f||_{k,p}^{\frac{1}{n}} \leq
\sup_{y \in supp {\cal F}_D(f)}|P(y)|.
\end{equation}

\noindent$\bullet$ Suppose now that $1 \leq p < 2$.
H\"older's inequality gives
\begin{equation}\label{sour}
||f||_{k,p}^{p} =
\displaystyle\int_{I\!\!R^d}(1+||x||^{2})^{-rp}|(1+||x||^{2})^{r}f(x)|^{p}\omega_k(x)dx
\leq
||(1+||x||^{2})^{r}f||_{k,2}^{p}||(1+||x||^{2})^{-rp}||_{k,\frac{2}{2-p}}.
\end{equation}
$$ \leq C ||(1+||x||^{2})^{r}f||_{k,2}^{p}, $$ for $r > 2\gamma
+d$.\\ Thus, from Proposition 2.7 we obtain $$ ||f||_{k,p}^{p}
\leq C ||(I - \triangle_k )^{r}[{\cal F}_D(f)]||_{k,2}^{p}.$$\\
Consequently for all $n \in I\!\!N$, we deduce that
\begin{equation}\label{kopm}
||P^n (iT) f||_{k,p} \leq C^{\frac{1}{p}}||(I - \triangle_k
)^{r}[P^n (\xi){\cal F}_D(f)]||_{k,2}.
\end{equation}
On the other hand from Proposition 5.1 of [9] we have, the following relation:

For all $\mu \in I\!\!N^d\backslash\{0\}$ there exist: $t_p ^0, t_p ^1
\in [0,1]$,$p=1,...,|\mu|-1$, such that for all $u \in {\cal E}(I\!\!R^d)$ we have
\begin{eqnarray}\label{65}
T^{\mu} u(x) & = & D^{\mu} u(x) + \displaystyle\sum_{\alpha \in R_+
}\{\displaystyle\sum_{|\beta|=|\mu|}\displaystyle\sum_{p=1}^{|\mu|-1}
Q_{\mu}(t_1 ^0,...,t_p ^0) D^{\beta}u\big(x - S_{\mu}(t_1 ^0
,...,t_p ^0 )<\alpha,x>\alpha\big) \nonumber
\\
& + & \displaystyle\sum_{|\beta'|=|\mu|}
P_{\mu}(t_1 ^1,...,t_{|\mu|-1}^1)
D^{\beta'}u\big(x - \widetilde{S}_{\mu}(t_1 ^1
,...,t_{|\mu|-1}^1)<\alpha,x>\alpha\big)
\},
\end{eqnarray} where
$Q_{\mu}(t_1,...,t_p),S_{\mu}(t_1,...,t_p)$, $p=1,...,|\mu|$ and
$P_{\mu}(t_1,...,t_{|\mu|-1}),\widetilde{S}_{\mu}(t_1,...,t_{|\mu|-1})$
are polynomials of degree at most $|\mu|$,with respect to each variable.\\ From this relation and by induction one can show that
\begin{equation}\label{htfv}
||(I - \triangle_k )^{r}[P^n (\xi){\cal F}_D(f)(\xi)]||_{k,2} \leq C n^{2r} ||P^{n-2r}(\xi)\varphi_n(\xi)||_{k,2}, \; n > 2r,
\end{equation}
with $supp \, \varphi_n \subset supp \,{\cal F}_D(f)$ and
$||\varphi_n||_{k,2} \leq C_1 $, where $C_1$ is a constant independent of $n$.\\ Hence, from the previous inequalities we deduce that
\begin{eqnarray}\label{nbvc}\nonumber
||P^n (iT) f||_{k,p} &\leq& C^{\frac{1}{p}}n^{2r}
||P^{n-2r}(\xi)\varphi_n(\xi)||_{k,2} \leq C^{\frac{1}{p}}n^{2r}
\sup_{y \in supp {\cal F}_D(f)}|P(y)|^{n-2r}
||\varphi_n(\xi)||_{k,2}\\ \nonumber\\ &\leq& C^{\frac{1}{p}}C_1 n^{2r}\sup_{y \in supp {\cal F}_D(f)}|P(y)|^{n-2r} .
\end{eqnarray}
Thus
\begin{equation}\label{lon}
\limsup_{n\to\infty} ||P(iT)^n f||_{k,p}^{\frac{1}{n}} \leq
\sup_{y \in supp {\cal F}_D(f)}|P(y)|.
\end{equation}
$\bullet$ Let now $p = \infty$. From the relation (22) We have $$
||f||_{\infty,k} \leq \frac{c_k^2}{4^{\gamma+ \frac{d}{2}}} ||{\cal F}_D(f)||_{k,1}.
$$
On the other hand, from Cauchy-Schawrz's inequality we obtain
$$
||{\cal F}_D(f)||_{k,1} \leq C_0 ||(1+||\xi||^2)^{\frac{2\gamma+d}{2}}{\cal F}_D(f)(\xi)||_{k,2},
$$
where $C_0$ is a positive constant.\\
Combining the previous inequalities and replacing $f$ by $P(iT)^n f$, we deduce that there exists a positive constant $C$ such that
\begin{equation}\label{hgfd}
||P(iT)^n f||_{k,\infty} \leq C ||P^n (\xi)(1+||\xi||^2)^{\frac{2\gamma+d}{2}}{\cal F}_D(f)(\xi)||_{k,2}.
\end{equation}
Consequently,
\begin{equation}\label{szaq}
\limsup_{n \to \infty} ||P(iT)^n f||_{k,\infty}^{\frac{1}{n}} \leq
\sup_{y \in supp \, (1+||\xi||^2)^{\frac{2\gamma+d}{2}}{\cal F}_D(f)}|P(y)| = \sup_{y \in supp \, {\cal F}_D(f)}|P(y)|.
\end{equation}
Thus from (44), (50) and (52) we have
\begin{equation}\label{pagyz}
\limsup_{n \to \infty} ||P(iT)^n f||_{k,p}^{\frac{1}{n}} \leq
\sup_{y \in supp \, {\cal F}_D(f)}|P(y)|, \; 1 \leq p \leq \infty.
\end{equation}
{\bf{ \underline{Second step}}}: In this step we want to prove that $$ \lim_{n \to \infty} ||P(iT)^n f||_{k,2}^{\frac{1}{n}} =
\sup_{y \in supp \, {\cal F}_D(f)}|P(y)|.$$
For any $\varepsilon$, $0 < \varepsilon < \sup_{y \in supp \, {\cal F}_D(f)}|P(y)|$, there exists a point $x_0 \in \sup_{y \in supp \, {\cal F}_D(f)}|P(y)|$ such that
$$
|P(x_0)| > \sup_{y \in supp \, {\cal F}_D(f)}|P(y)| - \frac{\varepsilon}{2}
$$ As $P$ is a continuous function, there exists a neighborhood
$U_{x_0}$ such that $$
|P(x)| > \sup_{y \in supp \, {\cal F}_D(f)}|P(y)| - \varepsilon, \; x \in U_{x_0}
$$ From Theorem 2.3 we deduce that $$
\begin{array}{lll}
||P(iT)^n f||_{k,2} &=& \frac{c_k^2}{4^{\gamma+
\frac{d}{2}}}||P(\xi)^n {\cal F}_D(f)||_{k,2} \\ &\geq&
\frac{c_k^2}{4^{\gamma+ \frac{d}{2}}} ||P(\xi)^n {\cal F}_D(f)1_{U_{x_0}}||_{k,2},\end{array} $$ where $1_{U_{x_0}}$ is the characteristic function of $U_{x_0}$.\\ Thus $$ ||P(iT)^n f||_{k,2} \geq \frac{c_k^2}{4^{\gamma+ \frac{d}{2}}}(\sup_{y \in supp {\cal F}_D(f)}|P(y)| - \varepsilon)^n || {\cal F}_D(f) 1_{U_{x_0}}||_{k,2}$$
This inequality implies,
\begin{equation}\label{usaki}
\liminf_{n \to \infty} ||P(iT)^n f||_{k,2}^{\frac{1}{n}} \geq
(\sup_{y \in supp {\cal F}_D(f)}|P(y)| - \varepsilon) \lim_{n \to
\infty}|| {\cal F}_D(f) 1_{U_{x_0}}||_{k,2}^{\frac{1}{n}} =
\sup_{y \in supp \, {\cal F}_D(f)}(|P(y)| - \varepsilon).
\end{equation}
But $\varepsilon$ can be chosen arbitrarily small, thus from
(\ref{pagyz}) and (\ref{usaki}) the relation (40) follows for
$p = 2$.\\ \noindent{\bf{ \underline{Third step}}}: In this step we shall prove that $$ \liminf_{n \to \infty} ||P(iT)^n f||_{k,p}^{\frac{1}{n}} \geq
\sup_{y \in supp \, {\cal F}_D(f)}|P(y)|, \;1 \leq p \leq \infty.$$
Since $f \in {\cal S}(I\!\!R^d)$, the iteration of the relation (7)
implies the relation
\begin{equation}\label{bhjt}
\displaystyle\int_{I\!\!R^d} \overline{P^n (-iT) f(x)} P^n (iT) f(x)\omega_k(x)dx
= \displaystyle\int_{I\!\!R^d}\overline{f(x)} P^{2n} (iT) f(x)\omega_k(x)dx.
\end{equation}
Hence, by H\"older's inequality,
\begin{equation}\label{kkkki}
||P^n (iT) f||_{k,2}^2 \leq ||f||_{k,q}||P^{2n} (iT) f||_{k,p}.
\end{equation}
Consequently
\begin{equation}\label{kkkki1}
\lim_{n \to \infty}||P^n (iT) f||_{k,2}^{\frac{1}{n}} \leq
(\lim_{n \to \infty}||f||_{k,q}^{\frac{1}{2n}})\liminf_{n \to
\infty}||P^{2n} (iT) f||_{k,p}^{\frac{1}{2n}} = \liminf_{n \to
\infty}||P^{2n} (iT) f||_{k,p}^{\frac{1}{2n}}.
\end{equation}
Applying now the relation (40) with $p = 2$, we conclude that
\begin{equation}\label{ijc}
\sup_{y \in supp \, {\cal F}_D(f)}|P(y)| = \lim_{n \to \infty}||P^n (iT)
f||_{k,2}^{\frac{1}{n}}\leq \liminf_{n \to
\infty}||P^{2n} (iT) f||_{k,p}^{\frac{1}{2n}}.
\end{equation}
We replace in formula (\ref{kkkki}) the function $f$ by $P(iT) f$ and we obtain
\begin{equation}\label{pluyt}
||P^{n+1} (iT) f||_{k,2}^2 \leq ||P(iT)f||_{k,q}||P^{2n+1} (iT)
f||_{k,p}.
\end{equation}
Thus
\begin{equation}\label{tfgr}
\sup_{y \in supp \, {\cal F}_D(f)}|P(y)| = \lim_{n \to \infty}||P^{n+1} (iT)
f||_{k,2}^{\frac{1}{n+1}}\leq \liminf_{n \to
\infty}||P^{2n+1} (iT) f||_{k,p}^{\frac{1}{2n+1}}.
\end{equation}
Using (\ref{ijc}) and (\ref{tfgr}) we deduce that
\begin{equation}\label{yhwa}
\sup_{y \in supp \, {\cal F}_D(f)}|P(y)| \leq \liminf_{n \to
\infty}||P^{n} (iT) f||_{k,p}^{\frac{1}{n}}.
\end{equation}
Then formulas (\ref{yhwa}) and (53) give (40). Thus we have proved the theorem under the condition (\ref{wahid}).\\
\noindent{\bf{ \underline{Fourth step}}}: Suppose now $\sup_{y \in supp {\cal F}_D(f)}|P(y)| = +\infty.$ Then for any $N
> 0$ there exists a point $x_0 \in supp {\cal F}_D(f)$ such that
$|P(x_0)| \geq 2N$. Since $P$ is a continuous function there exists a neighborhood $U_{x_0}$ of $x_0$ on which $|P(x)| > N$.
Similarly that the previous calculation of second step we obtain
$$
\begin{array}{lll} \liminf_{n \to \infty} ||P(iT)^n f||_{k,2}^{\frac{1}{n}} &\geq& \frac{c_k^2}{4^{\gamma+
\frac{d}{2}}} \liminf_{n \to \infty}||P^n(\xi) {\cal F}_D(f)1_{U_{x_0}}||_{k,2}^{\frac{1}{n}} ,\\
\\
& \geq & N \liminf_{n \to \infty}|| f 1_{U_{x_0}}||_{k,2}^{\frac{1}{n}} = N. \end{array}$$ We choose $N$
large, we obtain $$\lim_{n \to \infty} ||P(iT)^n f||_{k,2}^{\frac{1}{n}} = \infty. $$ Finally if $\sup_{y \in supp
{\cal F}_D(f)}|P(y)| = 0$ the identity (40) is clear for $p =
2$.\\ Hence the proof of the theorem is finished.
\begin{Def}\hspace*{-2mm}.
Let $P$ be a non-constant polynomial and $U_p = \{x \in I\!\!R^d, \,
|P(x)| \leq 1 \}$. The set $U_P$ is called a polynomial domain in
$I\!\!R^d$.
\end{Def}
\noindent{\bf{Remark}} \\ \hspace*{5mm} A disc is a polynomial domain. A polynomial domain may be unbounded and nonconvex, for example $U = \{x \in I\!\!R^d, \, |x_1... x_d| \leq 1 \}$.\\

We have the following result.
\begin{Cor}\hspace*{-2mm}. Let $f \in {\cal S}(I\!\!R^d)$.
The Dunkl transform ${\cal F}_D(f)$ vanishes outside a polynomial domain $U_P$, if and only if,
\begin{equation}\label{l14}
\limsup_{n\to\infty} ||P(iT)^n f||_{k,p}^{\frac{1}{n}} \leq 1, \; 1
\leq p \leq \infty.
\end{equation}
\end{Cor}
\noindent{\bf{Remark}} \\ \hspace*{5mm}i) If we take $P(y) = -
||y||^2$, then $P(iT) = \triangle_k$, and Theorem 4.1 and Corollary 4.3 characterize functions such that the support of their Dunkl transform is a ball.\\ \hspace*{5mm}ii) Theorem 4.1 and Corollary 4.3 generalize also the result obtained in [3].
\section{ Dunkl transform of functions vanishing on a Ball} The following theorem gives the radius of the large disc on which the Dunkl transform of functions in $L^2_{k}(I\!\!R^d)$ vanishes every where.
\begin{Th}\hspace*{-2mm}.
Let $f \in L^2_{k}(I\!\!R^d)$. We consider the sequence
\begin{equation}\label{l}
f_n(x) = E_n *_D f(x), \; x \in I\!\!R^d, \, n \in I\!\!N
\backslash\{0\}.
\end{equation}
where $$ E_n(y) = \frac{c_k}{(4n)^{\gamma +
\frac{d}{2}}}e^{-\frac{||y||^2}{4n}}$$
Then
\begin{equation}\label{ll}
\lim_{n \to \infty}\sqrt{-\frac{1}{n}\ln ||f_n||_{k,2}} =
\lambda_{{\cal F}_D(f)},
\end{equation}
where
\begin{equation}\label{lll}
\lambda_{{\cal F}_D(f)} = \displaystyle\inf
\displaystyle\left\{||\xi||, \; \xi \in supp {\cal F}_D(f)\right\}.
\end{equation}\label{TP}
\end{Th}
\noindent{\bf{Remark}}\\ \hspace*{5mm} The function $E_n$ is the Gauss kernel associated with Dunkl operators. From [11] p. 2424,
we have
\begin{equation}\label{zzzzz}
\forall \, x \in I\!\!R^d, \; {\cal F}_D (E_n)(x) = e^{-n||x||^2}.
\end{equation}

\noindent{\bf{Proof of Theorem \ref{TP}}}\\ \hspace*{5mm} First we remark that from (37) the function $f_n$ is well defined.
We assume that $||f||_{k,2}
> 0
$, otherwise the result is trivial. To prove (\ref{ll}) it is sufficient to verify the equivalent identity
\begin{equation}\label{llll}
\lim_{n \to \infty} ||f_n||_{k,2}^{\frac{1}{n}} = \exp( -
\lambda_{{\cal F}_D(f)}^2).
\end{equation}
Using (\ref{zzzzz}) and (37) we deduce that the Dunkl transform of
$f_n(x)$ is $ \exp(-n||\xi||^2){\cal F}_D(f)(\xi)$. Then by applying Theorem 2.3 we obtain
\begin{eqnarray}\label{lllll}
||f_n||_{k,2} &=&
\frac{c_k}{2^{\gamma+\frac{d}{2}}}||\exp(-n||\xi||^2){\cal F}_D(f)(\xi)||_{k,2} \\ &=&
\frac{c_k}{2^{\gamma+\frac{d}{2}}}||f||_{k,2} \{\displaystyle\int_{supp {\cal F}_D(f)}\exp(-2n||\xi||^2)\displaystyle\frac{|{\cal F}_D(f)(\xi)|^2}{||f||_{k,2}^2}
\omega_k(\xi)d\xi\}^{\frac{1}{2}}.\nonumber
\end{eqnarray}
On the other hand it is known that if $m$ is the Lebesque measure on $I\!\!R^d$ and $U$ a subset of $I\!\!R^d$ such that $m(U) = 1$, then for all $\phi$ in the Lebesgue space $L^p(U,dm)$, $1 \leq p \leq
+\infty$, we have
\begin{equation}\label{l6}
\lim_{p\to \infty}||\phi||_{L^p(U;dm)} =
||\phi||_{L^\infty(U;dm)}.
\end{equation}
By applying formula (\ref{l6}) with $$U = supp {\cal F}_D(f),
\;\phi = \exp(-||\xi||^2), \; p = 2n, \; \mbox{ and} \;dm(\xi) =
\displaystyle\frac{|{\cal F}_D(f)(\xi)|^2}{||f||_{k,2}^2}\omega_k(\xi)d\xi,$$
and using the fact that $\lim_{n \to +\infty} (\frac{c_k
||f||_{k,2} }{2^{\gamma+\frac{d}{2}}})^{\frac{1}{n}} = 1$.\\ We obtain
\begin{equation}\label{l7}
\lim_{n \to \infty}||f_n||_{k,2} = \sup_{\xi \in supp {\cal F}_D(f)}\exp(-||\xi||^2) = \exp( - \lambda_{{\cal F}_D(f)}^2).
\end{equation}
Which is the relation (\ref{llll}).\\

\hspace*{5mm} A function $f \in L^2_{k}(I\!\!R^d)$ is the Dunkl transform of a function vanishing in a neighborhood of the origin,
if and only if, $\lambda_{{\cal F}_D(f)} > 0$, or equivalently, if and only if the limit (\ref{llll}) is less than $1$. Thus we have proved the following result.
\begin{Cor}\hspace*{-2mm}.\label{har}
The condition
\begin{equation}\label{l8}
\lim_{n \to \infty} ||f_n||_{k,2}^{\frac{1}{n}} < 1,
\end{equation}
is necessary and sufficient for a function $f \in L^2_{k}(I\!\!R^d)$
to have its Dunkl transform vanishing in a neighborhood of the origin

\end{Cor}
\noindent{\bf{Remark}}\\ \hspace*{5mm} From Theorem 3.3 and Corollary \ref{har} it follows that the support of the Dunkl transform of a function in $L^2_{k}(I\!\!R^d)$ is in the tore
$\lambda_{{\cal F}_D(f)} \leq ||\xi|| \leq R_{{\cal F}_D(f)}$, if and only if,
\begin{equation}\label{l9}
\lambda_{{\cal F}_D(f)} \leq \lim_{n \to
\infty}\sqrt{-\frac{1}{n}\ln ||f_n||_{k,2}}\leq \lim_{n \to
\infty} ||\triangle_k^n f||_{k,2}^{{\frac{1}{2n}}} \leq R_{{\cal F}_D(f)}.
\end{equation}
\begin{Th}\hspace*{-2mm}. For any function $f \in {\cal S}(I\!\!R^d)$ the following relation holds
\begin{equation}\label{pknfr}
\lim_{n \to \infty} ||\displaystyle\sum_{m = 0}^{\infty}
\frac{(n\triangle_k)^m\, f }{m!}||_{k,p}^{\frac{1}{n}} = \exp( -
\lambda_{{\cal F}_D(f)}^2), \; 1 \leq p \leq \infty.
\end{equation}
In particular, a function $f \in {\cal S}(I\!\!R^d)$ is the Dunkl transform of a function in ${\cal S}(I\!\!R^d)$ vanishing in the ball
$B(o,r)$ of center $o$ and radius $r$, if and only if we have
\begin{equation}\label{pknfr45}
\lim_{n \to \infty} ||\displaystyle\sum_{m = 0}^{\infty}
\frac{(n\triangle_k)^m\, f }{m!}||_{k,p}^{\frac{1}{n}}\leq
\exp(-r^2), \; 1 \leq p \leq \infty.
\end{equation}
\end{Th}
\noindent{\bf{Proof}}\\ \hspace*{5mm} A similar proof to that of Theorem 4.1, gives the result.
\section{Dunkl transform of functions, vanishing outside a symmetric body }
\hspace*{5mm} A subset $K$ of $I\!\!R^d$ is called a symmetric body if $-x \in K$ for all $x \in K$. The set $K^* := \{y \in I\!\!R^d, \;
\langle x,y\rangle \leq 1 \; for \, all \, x \in K \}$ is called the polar set of $K$.
We state now the following another real Paley-Wiener theorem.
\begin{Th}\hspace*{-2mm}.
A function $f \in {\cal E}(I\!\!R^d)$ is the Dunkl transform of a function in $L^2_{k}(I\!\!R^d)$ vanishing outside a symmetric body
$K$, if and only if, $T^\mu f$ belongs to $L^2_{k}(I\!\!R^d)$ for all
$\mu = (\mu_1,...,\mu_d) \in I\!\!N^d$, and for all $n \in I\!\!N$ we have
\begin{equation}\label{l10}
\sup_{a \in K^*}||(\langle a,T\rangle)^n f||_{k,2} \leq
||f||_{k,2},
\end{equation}
where $T = (T_1,...,T_d)$.
\end{Th}
\noindent{\bf{Proof}}\\ \hspace*{5mm} Let $f \in {\cal E}(I\!\!R^d)$
assume $f \neq 0$, otherwise the result is clear. We suppose that
$ {\cal F}_D(f)$ which belongs in $ L^2_{k}(I\!\!R^d)$ vanishes out side a symmetric body $K$. Then $f$ is infinitely differentiable and belongs to $L^2_{k}(I\!\!R^d)$ together with $T^\mu f$ for all
$\mu = (\mu_1,...,\mu_d) \in I\!\!N^d$. As the Dunkl transform of $(i
(\langle a,\xi\rangle)^n {\cal F}_D(f)(-\xi)$ is $(\langle a,T\rangle)^n f$, then by applying Theorem 2.3, we obtain
\begin{equation}\label{l11}
||(\langle a,T\rangle)^n f||_{k,2} =
\frac{c_k}{2^{\gamma+\frac{d}{2}}}||(\langle a,\xi\rangle)^n {\cal F}_D(f)(\xi)||_{k,2}.
\end{equation}
As $K$ satisfies the symmetric property, we deduce that $|\langle a,\xi\rangle| \leq 1$ for all $\xi \in K$ and $a \in K^*$. Hence
$$\begin{array}{lll} ||(\langle a,\xi\rangle)^n {\cal F}_D(f)(.)||_{k,2}^2 &=& \displaystyle\int_{K}|(\langle a,\xi\rangle)^n {\cal F}_D(f)(\xi)|^2\omega_k(\xi)d\xi \\ &\leq& \displaystyle\int_{K}|{\cal F}_D(f)(\xi)|^2\omega_k(\xi)d\xi =
\frac{4^{\gamma+\frac{d}{2}}}{c_k^2} ||f||_{k,2}^2.\end{array}$$
Thus $$ \sup_{a \in K^*}||(\langle a,T\rangle)^n f||_{k,2} \leq
||f||_{k,2}.$$
\hspace*{5mm} Conversely, we assume that the inequality
(\ref{l10}) is valid for all $n \in I\!\!N$. Since $T^\mu f \in L^2_{k}(I\!\!R^d)$ for all $\mu = (\mu_1,...,\mu_d) \in I\!\!N^d$. Thus from Proposition 2.7 and Theorem 2.3 and the inequality (\ref{l10}) we obtain for all
$n \in I\!\!N$:
\begin{equation}\label{l12}
\sup_{a \in K^*}||(\langle a,\xi\rangle)^n {\cal F}_D(f)(\xi)||_{k,2} = \frac{2^{\gamma+\frac{d}{2}}}{c_k}\sup_{a
\in K^*}||(\langle a,T\rangle)^n f||_{k,2} \leq
\frac{2^{\gamma+\frac{d}{2}}}{c_k}||f||_{k,2}.
\end{equation}
Let $\xi_0 \notin K$, that means there exists $a_0 \in K^*$ such that $\langle\xi_0,a\rangle > 1$. Then there is a neighborhood
$U_{\xi_0}$ of $\xi_0$ with the property $\langle\xi,a\rangle >
\displaystyle\frac{1+ \langle\xi_0,a\rangle }{2} > 1$, for all $\xi \in U_{\xi_0}$. Thus for all $n \in I\!\!N$:
\begin{eqnarray}\label{l13}\nonumber
\frac{2^{\gamma+\frac{d}{2}}}{c_k}||f||_{k,2} &\geq& \sup_{a \in K^*}||(\langle a,\xi\rangle)^n {\cal F}_D(f)(\xi)||_{k,2} \geq
(\displaystyle\int_{U_{\xi_0}}|(\langle a,\xi\rangle)^n {\cal F}_D(f)(\xi)|^2\omega_k(\xi)d\xi)^{\frac{1}{2}}\\ \\ &\geq&
(\displaystyle\frac{1+ \langle\xi_0,a\rangle }{2})^n(\displaystyle\int_{U_{\xi_0}}|{\cal F}_D(f)(\xi)|^2\omega_k(\xi)d\xi)^{\frac{1}{2}}. \nonumber
\end{eqnarray}
Since $(\displaystyle\frac{1+ \langle\xi_0,a\rangle }{2})^n$ approaches
$\infty$ as $n \to \infty$, (\ref{l13}) holds only if
$$\displaystyle\int_{U_{\xi_0}}|{\cal F}_D(f)(\xi)|^2\omega_k(\xi)d\xi = 0,$$
this implies that $\xi_0$ does not belongs to the support of
${\cal F}_D(f)$. Hence ${\cal F}_D(f) \subset K$, and Theorem 6.1 is proved.\vspace*{5mm}
\title{Hot Gas in Galaxy Clusters: Theory and Simulations}

\begin{abstract}
We review the theory of the formation of galaxy clusters and discuss their role as cosmological probes. We begin with the standard cosmological framework where we discuss the origin of the CDM matter power spectrum and the growth of density fluctuations in the linear regime. We then summarize the spherical top-hat model for the nonlinear growth of fluctuations from which scaling relations and halo statistics are derived.
Numerical methods for simulating gas in galaxy clusters are then overviewed with an emphasis on multiscale hydrodynamic simulations of cluster ensembles. Results of hydrodynamic AMR simulations are described which compare cluster internal and statistical properties as a function of their assumed baryonic processes. Finally, we compare various methods of measuring cluster masses using X-ray and the thermal Sunyaev-Zeldovich effect (SZE). We find that SZE offers great promise for precision measurements in raw samples of high-z clusters.
\end{abstract}

\section{Introduction}
The Sunyaev-Zeldovich Effect (SZE) detectable in galaxy clusters has emerged as a powerful new probe of the low to intermediate redshift universe (see articles by Birkinshaw {\&} Rephaeli in this volume, as well the review by Carlstrom, et al. \cite{carlstrom02}. Within the prevailing theory of cosmological structure formation, galaxy clusters form in rare, massive peaks of the cosmic density field. Because of natural biasing, such regions get a ``head start'' on structure formation on all scales smaller than the cluster scale.
As a consequence, galaxy clusters at the present epoch contain the oldest objects in the universe in an evolutionary sense \cite{springel05}.
This makes galaxy clusters intrinsically interesting as astrophysical objects, worthy of study observationally, theoretically, and computationally.

However, much of the current interest stems from the potential use of galaxy clusters as cosmological probes. As discussed in more detail below, the space density of galaxy clusters as a function of cosmological redshift is sensitive to the RMS mass fluctuations on scales of 10$^{14-15} M_{\odot}$,
which depends on $\Omega _{m}$, the mean mass density of the universe, and to a lesser extent, $\Omega _{de}$, the dark energy density of the universe. Attempts to deduce $\Omega _{m}$ based on X-ray surveys have met with some success \cite{Rosati02}, but they have been hampered by the fact that at these wavebands cluster samples become sparse at z$>$1 owing to their low surface brightness. Because the SZE is intrinsically redshift independent, one has the possibility of detecting clusters over a wide range of redshifts. Blind surveys with sufficient sensitivity can in principle detect clusters from z=0 to their formation redshift $z \leq 1.5$ \cite{carlstrom02}, paving the way for more precise cosmological parameter measurements. Follow-up pointed observations of a large sample of galaxy clusters over a range of redshifts would enable a detailed study of their formation and evolution. Such studies would confirm or modify our theory of structure formation, improve our understanding of galaxy evolution, and reveal a great deal about the complex physical processes operating in the intracluster medium (ICM).

This paper summarizes four lectures the author delivered at the Varenna Summer School entitled ``Background Microwave Radiation and Intracluster Cosmology'',
held July 2004 in Varenna, Italy. Originally, the organizers asked me to deliver three lectures covering numerical simulations of galaxy clusters, as well as to review the basics of cosmological structure formation, of which galaxy clusters are just one aspect. The first lecture of the school was to have been given by Dr. Rocky Kolb on the cosmological standard model and the linear growth of density perturbations. When he was unable to attend the school, that responsibility fell to me, increasing my task to four lectures.
Fortunately, Dr. Kolb's lecture slides were made available to me, which I used verbatim. The following Section 2 follows closely the content and organization of Dr. Kolb's lecture notes, while Sections 3-5 are my own.
Section 3 reviews key concepts and results from structure formation theory that provide the vocabulary and framework for interpreting observations and simulations of galaxy clusters. Section 4 discusses the technical challenges associated with simulating gas in galaxy clusters and reviews the numerical methods we have employed. Section 5 presents results of numerical simulations of statistical ensembles of galaxy glusters whose goal is to understand how observables such as X-ray luminosity, emission-weighted temperature, and SZE depend on cluster mass and baryonic physics.

In line with the character of the summer school, I have attempted to be pedagogical, emphasizing the key concepts and results that a student needs to know if he/she wants to understand the current literature or do research in this area. Literature citations are kept to a minimum, except for textbooks, reviews, and research papers that I found to be particularly helpful in preparing this article.

\section {Cosmological framework and perturbation growth in the linear regime}

Our modern theory of the structure and evolution of the universe, along with the observational data which support it, is admirably presented in a recent textbook by Dodelson \cite{dodelson03}.
Remarkable observational progress has been made in the past two decades which has strengthened our confidence in the correctness of the hot, relativistic, expanding universe model (Big Bang), has measured the universe's present mass-energy contents and kinematics, and lent strong support to the notion of a very early, inflationary phase. Moreover,
observations of high redshift supernovae unexpectedly have revealed that the cosmic expansion is accelerating at the present time, implying the existence of a pervasive, dark energy field with negative pressure \cite{Perlmutter03}. This surprising discovery has enlivened observational efforts to accurately measure the cosmological parameters over as large a fraction of the age of the universe as possible, especially over the redshift interval 0
$<$ z $<$ 1.5 which, according to current estimates, spans the deceleration-acceleration transition. These efforts include large surveys of galaxy large scale structure, galaxy clusters, weak lensing, the Lyman alpha forest, and high redshift supernovae, all of which span the relevant redshift range. Except for the supernovae, all other techniques rely on measurements of cosmological structure in order to deduce cosmological parameters.

\subsection{Cosmological standard model}
The dynamics of the expanding universe is described by the two Friedmann equations derived from Einstein's theory of general relativity under the assumption of homogeneity and isotropy. The expansion rate at time $t$ is given by
\begin{equation}\label{eq1}
H^2(t)\equiv \left( {\frac{\dot {a}}{a}} \right)^2=\frac{8\pi G}{3}\sum\limits_i {\rho _i } -\frac{k}{a^2}+\frac{\Lambda }{3}
\end{equation}
where $H(t)$ is the Hubble parameter and $a(t)$ is the FRW scale factor at time
$t$. The first term on the RHS is proportional to the sum over all energy densities in the universe $\rho _{i }$ including baryons, photons, neutrinos, dark matter and dark energy. We have explicitly pulled the dark energy term out of the sum and placed it in the third term assuming it is a constant (the cosmological constant). The second term is the curvature term, where
$k=0,\pm 1$ for zero, positive, negative curvature, respectively. Equation (\ref{eq1})
can be cast in a form useful for numerical integration if we introduce
$\Omega $ parameters:
\begin{equation}\label{eq2}
\Omega _i \equiv \frac{8\pi G}{3H^2}\rho _i ,\mbox{ }\Omega _\Lambda \equiv
\frac{8\pi G}{3H^2}\rho _\Lambda =\frac{\Lambda }{3H^2},\mbox{ }\Omega_k
\equiv \frac{-k}{(aH)^2}
\end{equation}
Dividing equation (\ref{eq1}) by $H^2$ we get the sum rule 1=$\Omega _{m}+\Omega _{k}+\Omega _{\Lambda }$, which is true at all times,
where $\Omega _{m}$ is the sum over all $\Omega _{i}$ excluding dark energy. At the present time $H(t)=H_{0}, a=1$, and cosmological density parameters become
\begin{equation}\label{eq3}
\Omega _i (0)=\frac{8\pi G}{3H_0^2 }\rho _i (0),\mbox{ }\Omega _\Lambda
(0)=\frac{\Lambda }{3H_0^2 },\mbox{ }\Omega _k (0)=\frac{-k}{H_0^2 }
\end{equation}
Equation (\ref{eq1}) can then be manipulated into the form
\begin{equation}\label{eq4}
\dot {a}=H_0 [\Omega _m (0)(a^{-1}-1)+\Omega _\gamma (0)(a^{-2}-1)+\Omega _\Lambda (0)(a^2-1)+1]^{1/2}
\end{equation}
Here we have explicitly introduced a density parameter for the background radiation field $\Omega _{\gamma }$ and used the fact that matter and radiation densities scale as a$^{-3}$ and a$^{-4}$, respectively,
and we have used the sum rule to eliminate $\Omega _{k}$. Equation (\ref{eq4}) is equation (\ref{eq1}) expressed in terms of the \textit{current} values of the density and Hubble parameters, and makes explicit the scale factor dependence of the various contributions to the expansion rate. In particular, it is clear that the expansion rate is dominated first by radiation, then by matter, and finally by the cosmological constant.

Current measurements of the cosmological parameters by different techniques
\cite{spergel03} yield the following numbers [(0) notation suppressed]:
\[
\begin{array}{l}
h\equiv H_0 /(100km/s/Mpc)\approx 0.72 \\
\Omega _{total} \approx 1,\mbox{ }\Omega _\Lambda \approx 0.73\mbox{,
}\Omega _m =\Omega _{cdm} +\Omega _b \approx 0.27,\Omega _k \approx 0 \\
\Omega _b \approx 0.04,\mbox{ }\Omega _\nu \approx 0.005,\mbox{ }\Omega _\gamma \approx 0.00005 \\
\end{array}
\]
This set of parameters is referred to as the concordance model \cite{bops99}, and describes a spatially flat, low matter density, high dark energy density universe in which baryons, neutrinos, and photons make a negligible contribution to the large scale dynamics. Most of the matter in the universe is cold dark matter (CDM) whose dynamics is discussed below. As we will also see below, baryons and photons make an important contribution to shaping of the matter power spectrum despite their small contribution to the present-day energy budget. Understanding the evolution of baryons in nonlinear structure formation is essential to interpret X-ray and SZE observations of galaxy clusters.

The second Friedmann equation relates the second time derivative of the scale factor to the cosmic pressure $p $ and energy density\textit{ $\rho $}
\begin{equation}\label{eq5}
\frac{\ddot {a}}{a}=-\frac{4\pi G}{3}(\rho +3p),\mbox{ }\rho =\sum\limits_i
{\rho _i } =\rho _m +\rho _\gamma +\rho _\Lambda
\end{equation}
$p$ and $\rho $ are related by an equation of state $p_{i}=w_{i}\rho _{i}$, with $w_{m}$=0, $w_{\gamma }$=1/3, and $w_{\Lambda }= -1$. We thus have
\begin{equation}\label{eq6}
\frac{\ddot {a}}{a}=-\frac{4\pi G}{3}(\rho _m +2\rho _\gamma -2\rho _\Lambda
)\mbox{. }
\end{equation}
Expressed in terms of the current values for the cosmological parameters we have
\begin{equation}\label{eq7}
\frac{\ddot {a}}{a}=-\frac{1}{2}H_0^2 [\Omega _m (0)a^{-3}+2\Omega _\gamma
(0)a^{-4}-2\Omega _\Lambda (0)]\mbox{. }
\end{equation}
Evaluating equation \ref{eq7} using the concordance parameters, we see the universe is currently accelerating $\ddot {a}\approx 0.6H_0^2 \mbox{ }$ .
Assuming the dark energy density is a constant, the acceleration began when
\begin{equation}\label{eq8}
a\equiv \frac{1}{1+z}=\left( {\frac{\Omega _m (0)}{2\Omega _\Lambda (0)}}
\right)^{\mbox{1/3}}\mbox{ }\approx 0.57
\end{equation}
or $z\sim 0.75$.

\subsection{The Linear power spectrum}

Cosmic structure results from the amplification of primordial density fluctuations by gravitational instability. The power spectrum of matter density fluctuations has now been measured with considerable accuracy across roughly four decades in scale. Figure \ref{fig1} shows the latest results,
taken from reference
\cite{tegmark03}. Combined in this figure are measurements using cosmic microwave background (CMB) anisotropies, galaxy large scale structure, weak lensing of galaxy shapes, and the Lyman alpha forest, in order of decreasing comoving wavelength. In addition, there is a single data point for galaxy clusters, whose current space density measures the amplitude of the power spectrum on 8 h$^{-1}$ Mpc scales \cite{wef93}.
Superimposed on the data is the predicted $\Lambda $CDM linear power spectrum at z=0 for the concordance model parameters. As one can see, the fit is quite good. In actuality, the concordance model parameters are determined by fitting the data. A rather complex statistical machinery underlies the determination of cosmological parameters, and is discussed in Dodelson (2003, Ch. 11). The fact that modern CMB and LSS data agree over a substantial region of overlap gives us confidence in the correctness of the concordance model. In this section, we define the power spectrum mathematically, and review the basic physics which determines its shape.
Readers wishing a more in depth treatment are referred to references
\cite{dodelson03,kolbturner90}.

\begin{figure}[htbp]
\centerline{\includegraphics[width=4.15in,height=3.8in]{fig1small.eps}}
\caption{Linear matter power spectrum P(k) versus wavenumber extrapolated to z=0, from various measurements of cosmological structure. The best fit
$\Lambda $CDM model is shown as a solid line. From \cite{tegmark03}.}
\label{fig1}
\end{figure}

At any epoch $t$ (or $a$ or $z)$ express the matter density in the universe in terms of a mean density and a local fluctuation:
\begin{equation}\label{eq9}
\rho (\vec {x})=\bar {\rho }(1+\delta (\vec {x}))
\end{equation}
where $\delta (\vec {x})$is the density contrast. Expand $\delta (\vec
{x})$ in Fourier modes:
\begin{equation}\label{eq10}
\delta (\vec {x})\equiv \frac{\rho (\vec {x})-\bar {\rho }}{\bar {\rho
}}=\int {\delta (\vec {k})\exp (-i\vec {k}\cdot \vec {x})d^3} k.
\end{equation}
The autocorrelation function of $\delta (\vec {x})$ defines the power spectrum through the relations
\begin{equation}\label{eq11}
\left\langle {\delta (\vec {x})\delta (\vec {x})} \right\rangle
=\int\limits_0^\infty {\frac{dk}{k}} \frac{k^3\left| {\delta ^2(\vec {k})}
\right|}{2\pi ^2}=\int\limits_0^\infty {\frac{dk}{k}} \frac{k^3P(k)}{2\pi
^2}=\int\limits_0^\infty {\frac{dk}{k}} \Delta ^2(k)
\end{equation}
where we have the definitions
\begin{equation}\label{eq12}
P(k)\equiv \left| {\delta ^2(\vec {k})} \right|,\mbox{ and }\Delta
^2(k)\equiv \frac{k^3P(k)}{2\pi ^2}.
\end{equation}
The quantity $\Delta ^2(k)$ is called the dimensionless power spectrum and is an important function in the theory of structure formation. $\Delta
^2(k)$ measures the contribution of perturbations per unit logarithmic interval at wavenumber $k$ to the variance in the matter density fluctuations.
The $\Lambda $CDM power spectrum asymptotes to $P(k)\sim k^{1}$ for small
$k$, and $P(k)\sim k^{-3}$ for large $k$, with a peak a $k^{\star}\sim 2\times 10^{-2}$ h Mpc$^{-1}$ corresponding to $\lambda^{\star}\sim $350 h$^{-1}$ Mpc.
$\Delta ^2(k)$ is thus asymptotically flat at high $k$, but drops off as
$k^{4}$ at small $k$. We therefore see that most of the variance in the cosmic density field in the universe at the present epoch is on scales $\lambda < \lambda^{\star}.$

\begin{figure}[htbp]
\centerline{\includegraphics[width=4in,height=3in]{fig2.eps}}
\caption{The tale of two fluctuations. A fluctuation which is superhorizon scale at matter-radiation equality grows always, while a fluctuation which enters the horizon during the radiation dominated era stops growing in amplitude until the matter dominated era begins.}
\label{fig2}
\end{figure}

What is the origin of the power spectrum shape? Here we review the basic ideas.
Within the inflationary paradigm, it is believed that quantum mechanical
(QM) fluctuations in the very early universe were stretched to macroscopic scales by the large expansion factor the universe underwent during inflation. Since QM fluctuations are random, the primordial density perturbations should be well described as a Gaussian random field.
Measurements of the Gaussianity of the CMB anisotropies \cite{komatsu03} have confirmed this. The primordial power spectrum is parameterized as a power law $P_p
(k)\propto k^n$, with $n=1$ corresponding to scale-invariant spectrum proposed by Harrison and Zeldovich on the grounds that any other value would imply a preferred mass scale for fluctuations entering the Hubble horizon.
Large angular scale CMB anisotropies measure the primordial power spectrum directly since they are superhorizon scale. Observations with the WMAP satellite are consistent with $n=1$.

To understand the origin of the spectrum, we need to understand how the amplitude of a fluctuation of fixed comoving wavelength $\lambda$ grows with time. Regardless of its wavelength, the fluctuation will pass through the Hubble horizon as illustrated in Fig. \ref{fig2}. This is because the Hubble radius grows linearly with time, while the proper wavelength a$\lambda $
grows more slowly with time. It is easy to show from Eq. \ref{eq1} that in the radiation-dominated era, $a\sim t^{1/2}$, and in the matter-dominated era
(prior to the onset of cosmic acceleration) $a\sim t^{2/3}$. Thus, inevitably,
a fluctuation will transition from superhorizon to subhorizon scale. We are interested in how the amplitude of the fluctuation evolves during these two phases. Here we merely state the results of perturbation theory (e.g.,
Dodelson 2003, Ch. 7).

\begin{figure}[htbp]
\begin{tabular}{c}
\centerline{\includegraphics[width=4in,height=2in]{fig3a.eps}} \\
\centerline{\includegraphics[width=4.4in,height=2.2in]{fig3.eps}}
\end{tabular}
\caption{ a) Evolution of the primordial power spectrum on superhorizon scales during the radiaton dominated era. b) Scale-free spectrum produces a constant contribution to the density variance per logarithmic wavenumber interval entering the Hubble horizon (no preferred scale) c) resulting matter power spectrum, super- and sub-horizon. Figures courtesy Rocky Kolb.}
\label{fig3}
\end{figure}

\subsection{Growth of fluctuations in the linear regime }

To calculate the growth of superhorizon scale fluctuations requires general relativistic perturbation theory, while subhorizon scale perturbations can be analyzed using a Newtonian Jeans analysis. We are interested in scalar density perturbations, because these couple to the stress tensor of the matter-radiation field. Vector perturbations (e.g., fluid turbulence)
are not sourced by the stress-tensor, and decay rapidly due to cosmic expansion. Tensor perturbations are gravity waves, and also do not couple to the stress-tensor. A detailed analysis for the scalar perturbations yields the following results. In the \underline {radiation dominated era},
\[
\begin{array}{l}
\delta _+ (t)=\delta _+ (t_i )(t/t_i )\mbox{ superhorizon scales} \\
\delta _+ (t)=constant \mbox{ ~~~subhorizon scales} \\
\end{array}
\]
while in the \underline {matter dominated era},
\[
\begin{array}{l}
\delta _+ (t)=\delta _+ (t_i )(t/t_i )^{2/3}\mbox{ superhorizon scales} \\
\delta _+ (t)=\delta _+ (t_i )(t/t_i )^{2/3}\mbox{ subhorizon scales} \\
\end{array}
\]
This is summarized in Fig. \ref{fig2}, where we consider two fluctuations of different comoving wavelengths, which we will call large and small. The large wavelength perturbation remains superhorizon through matter-radiation equality (MRE), and enters the horizon in the matter dominated era. Its amplitude will grow as $t$ in the radiation dominated era, and as $t^{2/3}$ in the matter dominated era. It will continue to grow as $t^{2/3}$ after it becomes subhorizon scale. The small wavelength perturbation becomes subhorizon before MRE. Its amplitude will grow as $t$ while it is superhorizon scale, remain constant while it is subhorizon during the radiation dominated era, and then grow as $t^{2/3}$ during the matter-dominated era.

Armed with these results, we can understand what is meant by a scale-free primordial power spectrum (the Harrison-Zeldovich power spectrum.) We are concerned with perturbation growth in the very early universe during the radiation dominated era. Superhorizon scale perturbation amplitudes grow as
$t$, and then cease to grow after they have passed through the Hubble horizon.
We can define a Hubble wave number $k_H \equiv 2\pi /R_H \propto t^{-1}.$ Fig. 3a shows the primordial power spectrum at three instants in time for k$<$k$_{H}$. We see that the fluctuation amplitude at k=k$_{H}$(t)
depends on primordial power spectrum slope n. The scale-free spectrum is the value of n such that $\Delta ^2(k_{H}(t))$=constant for k$>$k$_{H}$. A simple analysis shows that this implies n=1. Since $\Delta ^2(k)\propto k^3P(k)$, we then have
\[
\begin{array}{l}
P(k)\propto k^1,\mbox{ }k\le k_H \\
P(k)\propto k^{-3},\mbox{ }k>k_H \\
\end{array}
\]

In actuality, the power spectrum has a smooth maximum, rather than a peak as shown in Fig. 3c. This smoothing is caused by the different rates of growth before and after matter-radiation equality.
The transition from radiation to matter-dominated is not instantaneous. Rather, the expansion rate of the universe changes smoothly through equality, as given by Eq. 1, and consequently so do the temporal growth rates. The position of the peak of the power spectrum is sensitive to the when the universe reached matter-radiation equality, and hence is a probe of $\Omega _\gamma /\Omega _m $.

Once a fluctuation becomes sub-horizon, dissipative processes modify the shape of the power spectrum in a scale-dependent way. Collisionless matter will freely stream out of overdense regions and smooth out the inhomogeneities. The faster the particle, the larger its free streaming length. Particles which are relativistic at MRE, such as light neutrinos,
are called hot dark matter (HDM). They have a large free-streaming length,
and consequently damp the power spectrum over a large range of k. Weakly Interacting Massive Particles (WIMPs) which are nonrelativistic at MRE, are called cold dark matter (CDM), and modify the power spectrum very little
(Fig. \ref{fig4}).
Baryons are tightly coupled to the radiation field by electron scattering prior to recombination. During rcombination, the photon mean-free path becomes large. As photons stream out of dense regions, they drag baryons along, erasing density fluctuations on small scales. This process is called Silk damping, and results in damped oscillations of the baryon-photon fluid once they become subhorizon scale. The magnitude of this effect is sensitive to the ratio of baryons to collisionless matter, as shown in Fig.
\ref{fig4}.

\begin{figure}[htbp]
\includegraphics[width=2.5in,height=1.7in]{fig4.eps}
\includegraphics[width=2.5in,height=1.7in]{fig5.eps}
\caption{Effect of dissipative processes on the evolved power spectrum. Left: Effect of collisionless damping (free streaming) in the dark matter. Right: Effect of collisional damping (Silk damping) in the matter-radiation fluid. Figures courtesy Rocky Kolb.}
\label{fig4}
\end{figure}

\section {Analytic models for nonlinear growth, virial scaling \\
relations, and halo statistics}

Here we introduce a few concepts and analytic results from the theory of structure formation which underly the use of galaxy clusters as cosmological probes. These provide us with the vocabulary which pervades the literature on analytic and numerical models of galaxy cluster evolution. Material in this section has been derived from three primary sources: Padmanabhan (1993)
\cite{pad93} for the spherical top-hat model for nonlinear collapse, Dodelson (2003)
\cite{dodelson03} for Press-Schechter theory, and Bryan {\&} Norman (1998) \cite{BN98}
for virial scaling relations.

\subsection{Nonlinearity defined}

In the linear regime, both super- and sub-horizon scale perturbations grow as $t^{2/3}$ in the matter-dominated era. This means that after recombination,
the linear power spectrum retains its shape while its amplitude grows as
$t^{4/3}$ before the onset of cosmic acceleration. When $\Delta ^2(k)$ for a given k approaches unity linear theory no longer applies, and some other method must be used to determine the fluctuation's growth. In general,
numerical simulations are required to model the nonlinear phase of growth because in the nonlinear regime, the modes do not grow independently.
Mode-mode coupling modifies both the shape and amplitude of the power spectrum over the range of wavenumbers that have gone nonlinear.

At any given time, there is a critical wavenumber which we shall call the nonlinear wavenumber k$_{nl}$ which determines which portion of the spectrum has evolved into the nonlinear regime. Modes with k$<$k$_{nl}$ are said to be linear, while those for which k$>$ k$_{nl}$ are nonlinear.
Conventionally, one defines the nonlinear wavenumber such that $\Delta
(k_{nl} ,z)=1.$ From this one can derive a nonlinear mass scale $M_{nl}
(z)=\frac{4\pi }{3}\bar {\rho }(z)\left( {\frac{2\pi }{k_{nl} }} \right)^3$.
A more useful and rigorous definition of the nonlinear mass scale comes from evaluating the amplitude of mass fluctuations within spheres or radius R at epoch z. The enclosed mass is $M=\frac{4\pi }{3}\bar {\rho }(z)R^3.$ The mean square mass fluctuations (variance) is
\begin{equation}\label{eq17}
\left\langle {(\delta M/M)^2} \right\rangle \equiv \sigma ^2(M)=\int
{d^3kW_T^2 (kR)P(k,z),}
\end{equation}
where W is the Fourier transform of the top-hat window function
\begin{equation}\label{eq18}
\begin{array}{l}
\mbox{W(}{\rm {\bf x}}\mbox{)}=\left\{ {{\begin{array}{*{20}c}
{3/4\pi R^3,\mbox{ }\left| {\rm {\bf x}} \right|<R} \hfill \\
{0,\mbox{ }\left| {\rm {\bf x}} \right|\ge R} \hfill \\
\end{array} }} \right. \\
\to W_T (kR)=3\left[ {\sin (kR)/kR-\cos (kR)} \right]/(kR)^2. \\
\end{array}
\end{equation}
If we approximate P(k) locally with a power-law $P(k,z)=D^2(z)k^m$, where D is the linear growth factor, then $\sigma ^2(M)\propto D^2R^{-(3+m)}\propto D^2M^{-(3+m)/3}.$ From this we see that the RMS fluctuations are a decreasing function of M. At very small mass scales, m$\rightarrow -3$, and the fluctuations asymptote to a constant value. We now define the nonlinear mass scale by setting $\sigma $(M$_{nl})$=1. We get that (\cite{white94})
\begin{equation}\label{eq19}
M_{nl} (z)\propto D(z)^{6/(3+m)}\mbox{ (}\propto
\mbox{(1}+\mbox{z)}^{\mbox{-6/(3}+\mbox{m)}}\mbox{ for EdS).}
\end{equation}
For $m > -3$, the smallest mass scales become nonlinear first. This is the origin of hierarchical (``bottom-up'') structure formation.

\subsection{Spherical Top-Hat Model}

\begin{figure}[htbp]
\centerline{\includegraphics[width=3in,height=2in]{fig6.eps}}
\caption{Evolution of a top-hat perturbation in an EdS universe. Depending on the E, the first integral of motion, the fluctuation collapses (E$<$0),
continues to expand (E$>$0), or asymptotically reaches it maximum radius
(E=0). Virialization occurs when the fluctuation has collapsed to half its turnaround radius.}
\label{fig5}
\end{figure}

We now ask what happens when a spherical volume of mass M and radius R exceeds the nonlinear mass scale. The simplest analytic model of the nonlinear evolution of a discrete perturbation is called the spherical top-hat model. In it, one imagines as spherical perturbation of radius $R$
and some constant overdensity $\bar {\delta }=3M/4\pi R^3$ in an Einstein-de Sitter
(EdS) universe. By Birkhoff's theorem the equation of motion for R is
\begin{equation}\label{eq20}
\frac{d^2R}{dt^2}=-\frac{GM}{R^2}=-\frac{4\pi G}{3}\bar {\rho }(1+\bar
{\delta })R
\end{equation}
whereas the background universe expands according to Eq. \ref{eq6}
\begin{equation}\label{eq21}
\frac{d^2a}{dt^2}=-\frac{4\pi G}{3}\bar {\rho }a.
\end{equation}

Comparing these two equations, we see that the perturbation evolves like a universe of a different mean density, but with the same initial expansion rate. Integrating Eq. \ref{eq20} once with respect to time gives us the first integral of motion:
\begin{equation}\label{eq22}
\frac{1}{2}\left( {\frac{dR}{dt}} \right)^2-\frac{GM}{R}=E,
\end{equation}
where E is the total energy of the perturbation. If E$<$0, the perturbation is bound, and obeys
\begin{equation}\label{eq23}
\frac{R}{R_m}=\frac{(1-cos \theta)}{2}, ~~~\frac{t}{t_m}=\frac{(\theta-sin\theta)}{\pi}
\end{equation}
where $R_m$ and $t_m$ are the radius and time of ``turnaround''. At turnaround
(as $\theta \rightarrow \pi$), the fluctuation reaches its maximum proper radius (see Fig. \ref{fig5}). As
$t\rightarrow 2t_m, R\rightarrow 0$, and we say the fluctuation has collapsed.

A detailed analysis of the evolution of the top-hat perturbation is given in Padmanabhan (1993, Ch. 8) for general $\Omega_m$.
Here we merely quote results for an EdS universe.
The mean \textit{linear} overdensity at turnaround; i.e., the value one would predict from the linear growth formula $\delta \sim t^{2/3}$, is 1.063. The actual overdensity at turnaround using the nonlinear model is 4.6. This illustrates that nonlinear effects set in well before the amplitude of a linear fluctuation reaches unity. As R$\rightarrow $0, the nonlinear overdensity becomes infinite.
However, the linear overdensity at $t=2t_m$ is only 1.686. As the fluctuation collapses, other physical processes (pressure, shocks, violent relation)
become important which establish a gravitationally bound object in virial equilibrium before infinite density is reached. Within the framework of the spherical top-hat model, we say virialization has occurred when the kinetic and gravitational energies satisfy virial equilibrium: $\left| U \right|=2K.$ It is easy to show from conservation of energy that this occurs when $R=R_m/2$; in other words, when the fluctuation has collapsed to half its turnaround radius. The nonlinear overdensity at virialization $\Delta _c$
is not infinite since the radius is finite.
For an EdS universe, $\Delta _c =18\pi ^2\approx 180$. Fitting formulae for non-EdS models are provided in the next section.

\subsection{Virial Scaling Relations}
The spherical top-hat model can be scaled to perturbations of arbitrary mass. Using virial equilibrium arguments, we can predict various physical properties of the virialized object. The ones that interest us most are those that relate to the observable properties of gas in galaxy clusters,
such as temperature, X-ray luminosity, and SZ intensity change. Kaiser \cite{kaiser86}
first derived virial scaling relations for clusters in an EdS universe. Here we generalize the derivation to non-EdS models of interest. In order to compute these scaling laws, we must assume some model for the distribution of matter as a function of radius within the virialized object. A top-hat distribution with a density $\rho =\Delta _c \bar {\rho }(z)$ is not useful because it is not in mechanical equilibrium. More appropriate is the isothermal,
self-gravitating, equilibrium sphere for the collisionless matter, whose density profile is related to the one-dimensional velocity dispersion
\cite{bt87}
\begin{equation}\label{eq24}
\rho (r)=\frac{\sigma ^2}{2\pi Gr^2}.
\end{equation}
If we define the virial radius r$_{vir}$ to be the radius of a spherical volume within which the mean density is $\Delta _{c}$ times the critical density at that redshift ($M=4\pi r_{vir}^3 \rho _{crit} \Delta _c /3)$, then there is a relation between the virial mass M and $\sigma $:
\begin{equation}
\label{eq25}
\sigma =M^{1/3}[H^2(z)\Delta _c G^2/16]^{1/6}\approx 476f_\sigma \left(
{\frac{M}{10^{15}M_\odot }} \right)^{1/3}(h^2\Delta _c E^2)^{1/6}\mbox{ km s}^{\mbox{-1}}.
\end{equation}
Here we have introduced a normalization factor $f_{\sigma}$ which will be used to match the normailization from simulations. The redshift dependent Hubble parameter can be written as $H(z)=100hE(z)\mbox{ km s}^{-1}$ with the function $E^2(z)=\Omega _m (1+z)^3+\Omega _k (1+z)^2+\Omega _\Lambda $,
where the $\Omega$'s have been previously defined.

The value of $\Delta_c$ is taken from the spherical top-hat model, and is 18$\pi
^{2}$ for the critical EdS model, but has a dependence on cosmology through the parameter $\Omega (z)=\Omega _m (1+z)^3/E^2(z).$ Bryan and Norman
(1998) provided fitting formulae for $\Delta_c$ for the critical for both open universe models and flat, lambda-dominated models
\begin{equation}\label{eq26}
\Delta _c =18\pi ^2+82x-39x^2\mbox{ for }\Omega _k =0,\mbox{ }\Delta _c
=18\pi ^2+60x-32x^2\mbox{ for }\Omega _\Lambda =0
\end{equation}
where x=$\Omega $(z)-1.

If the distribution of the baryonic gas is also isothermal, we can define a ratio of the ``temperature'' of the collisionless material ($T_\sigma =\mu m_p \sigma ^2/k)$ to the gas temperature:
\begin{equation}
\label{eq27}
\beta =\frac{\mu m_p \sigma ^2}{kT}
\end{equation}
Given equations (\ref{eq26}) and (\ref{eq27}), the relation between temperature and mass is then
\begin{equation}
\label{eq28}
kT=\frac{GM^{2/3}\mu m_p }{2\beta }\left[ {\frac{H^2(z)\Delta _c }{2G}}
\right]^{1/3}\approx 1.39f_T \left( {\frac{M}{10^{15}M_\odot }}
\right)^{2/3}(h^2\Delta _c E^2)^{1/3}\mbox{ keV,}
\end{equation}
where in the last expression we have added the normalization factor f$_{T}$
and set $\beta $=1.

The scaling behavior for the object's X-ray luminosity is easily computed by assuming bolometric bremsstrahlung emission and ignoring the temperature dependence of the Gaunt factor: $L_{bol} \propto
\int {\rho ^2} T^{1/2}dV\propto M_b \rho T^{1/2}.$ where M$_{b}$ is the baryonic mass of the cluster. This is infinite for an isothermal density distribution, since $\rho $ is singular. Observationally and computationally, it is found that the baryon distribution rolls over to a constant density core at small radius. A procedure is described in Bryan and Norman (1998) which yields a finite luminosity:
\begin{equation}
\label{eq29}
L_{bol} =1.3\times 10^{45}\left( {\frac{M}{10^{15}M_\odot }}
\right)^{4/3}(h^2\Delta _c E^2)^{7/6}\mbox{ }\left( {\frac{\Omega _b
}{\Omega _m }} \right)^2\mbox{ erg s}^{-1}.
\end{equation}
Eliminating M in favor of T in Eq. \ref{eq29} we get
\begin{equation}
\label{eq30}
L_{bol} =6.8\times 10^{44}\left( {\frac{kT/f_T }{1.0\mbox{ keV}}}
\right)^2(h^2\Delta _c E^2)^{1/2}\mbox{ }\left( {\frac{\Omega _b }{\Omega _m
}} \right)^2\mbox{ erg s}^{-1}.
\end{equation}
The scaling of the SZ ``luminosity'' is likewise easily computed. If we define L$_{SZ}$ as the integrated SZ intensity change: $L_{SZ} =\int {dA\int {n_e
\sigma _T } } \left( {\frac{kT}{m_e c^2}} \right)dl\propto M_b T$, then
\begin{equation}\label{eq30a}
L_{SZ} =\frac{GM^{5/3}\sigma _T }{2\beta m_e c^2}\left[ {\frac{H^2(z)\Delta _c }{2G}} \right]^{1/3}\left( {\frac{\Omega _b }{\Omega _m }} \right).
\end{equation}
We note that cosmology enters these relations only with the combination of parameters $h^2\Delta _c E^2$, which comes from the relation between the cluster's mass and the mean density of the universe at redshift z. The redshift variation comes mostly from E(z), which is equal to (1+z)$^{3/2}$
for an EdS universe.

\subsection{Statistics of hierarchical clustering: Press-Schechter theory}
Now that we have a simple model for the nonlinear evolution of a spherical density fluctuation and its observable properties as a function of its virial mass, we would like to estimate the number of virialized objects of mass M as a function of redshift given the matter power spectrum. This is the key to using surveys of galaxy clusters as cosmological probes. While large scale numerical simulations can and have been used for this purpose
(see below), we review a powerful analytic approach by Press and Schechter
\cite{ps74} which turns out to be remarkably close to numerical results. The basic idea is to imagine smoothing the cosmological density field at any epoch z on a scale R such that the mass scale of virialized objects of interest satisfies $M=\frac{4\pi }{3}\bar {\rho }(z)R^3.$ Because the density field
(both smoothed and unsmoothed) is a Gaussian random field, the probability that the mean overdensity in spheres of radius R exceeds a critical overdensity $\delta _{c}$ is
\begin{equation}\label{eq31}
p(R,z)=\frac{2}{\sqrt {2\pi } \sigma (R,z)}\int\limits_{\delta _c }^\infty
{d\delta } \exp \left( {-\frac{\delta ^2}{2\sigma ^2(R,z)}} \right)
\end{equation}
where $\sigma(R,z)$ is the RMS density variation in spheres of radius R as discussed above.
Press and Schechter suggested that this probability be identified with the fraction of particles which are part of a nonlinear lump with mass exceeding M if we take $\delta _c =1.686,$ the linear overdensity at virialization.
This assumption has been tested against numerical simulations and found to be quite good \cite{wef93}. The fraction of the volume collapsed into objects with mass between $M$ and $M+dM$ is given by
$(dp/dM)dM$. Multiply this by the average number density of such objects
$\rho _m /M$ to get the number density of collapsed objects between
$M$ and $M+dM$:
\begin{equation}\label{eq32}
dn(M,z)=-\frac{\bar {\rho }}{M}\frac{dp(M(R),z)}{dM}dM.
\end{equation}
The minus sign appears here because p is a decreasing function of M.
Carrying out the derivative using the fact that $dM/dR=3M/R,$ we get
\begin{equation}\label{eq33}
\frac{dn(M,z)}{dM}=\sqrt {\frac{2}{\pi }} \frac{\bar {\rho }\delta _c
}{3M^2\sigma }e^{-\delta _c^2 /2\sigma ^2}\left[ {-\frac{d\ln \sigma }{d\ln R}} \right].
\end{equation}
The term is square brackets is related to the logarithmic slope of the power spectrum, which on the mass scale of galaxy clusters is close to unity. Eq.
\ref{eq33} is called the \textit{halo mass function}, and it has the form of a power law multiplied by an exponential. To make this more explicit, approximate the power spectrum on scales of interest as a power law as we have done above. Substituting the scaling relations for $\sigma $ in Eq. \ref{eq33} one gets the result \cite{white94}
\begin{equation}\label{eq34}
\frac{dn}{dM}=\left( {\frac{2}{\pi }} \right)^{1/2}\frac{\bar {\rho
}}{M^2}\left( {1+\frac{m}{3}} \right)\left[ {\frac{M}{M_{nl} (z)}}
\right]^{\frac{m-3}{6}}\exp \left[ {-\left( {\frac{M}{M_{nl} (z)}}
\right)^{\frac{3+m}{3}}/2} \right].
\end{equation}
Here, $M_{nl} (z)$ is the nonlinear mass scale. To be more consistent with the spherical top-hat model, it satisfies the relation $\sigma (M_{nl}
,z)=\delta _c $; i.e., those fluctuations in the smoothed density field that have reached the linear overdensity for which the spherical top-hat model predicts virialization.

\subsection{Application to galaxy clusters}

\begin{figure}[htbp]
\includegraphics[width=5in,height=3.33in]{fig7.eps}
\caption{Top left to bottom right: a) Integrated cluster mass function for three cosmologies and two redshifts; b) like a), but for integrated temperature function; c) like a) but for integrated SZ cross section; d)
redshift distribution of the integrated probability to find a cluster exceeding $M=3.5 \times 10^{14} h^{-1} M_{\odot}$; e) redshift distribution of the integrated probability to find a cluster exceeding kT=5 keV; f) redshift distribution of the integrated probability to find a cluster exceeding Y=$10^{-3}$ h arcmin$^{2}$. From \cite{ecf96}.}
\label{fig6}
\end{figure}

Galaxy clusters correspond to rare ($\sim $3$\sigma )$ peaks in the density field. Combining the halo mass function as prediced by the PS formalism with the scaling laws derived above, we can predict the evolution of the statistical properties of X-ray and SZ clusters of galaxies. Here we show a few results taken from Eke, Cole {\&} Frenk
(1996) \cite{ecf96}.
Fig. 6a shows the evolution of the integrated mass function $n(>M)$
for several cosmologies and redshifts. One can see the power-law behavior at lower mass and the exponential cutoff at higher M. One sees strong redshift evolution of the number of massive clusters in the EdS model, but slower evolution on the open and lambda models. This is because of the saturated growth of structure in low density models. This makes number counts of massive clusters a sensitive test of the linear growth factor D(z), which depends on $\Omega_m$ and $\Omega_{\Lambda}$.
Convolving the cluster population with the scaling relations for T(M) and Y(M), one gets distribution functions for n($>$T) and n($>$Y). Here $Y=L_{SZ}/d_A^2$ is the effective SZE cross section of a cluster, where $d_A$ is its angular diameter distance.
These are shown in Figs. 6b and 6c. Another way to present the data is to convolve the mass function with the differential volume element as a function of redshift for the three models. Figs. 6d-f plot the redshift probability of detecting a cluster with M, T, and Y exceeding the fiducial values given in the figure caption. As one can see,
the profiles are sharply peaked at low redshift for the EdS model, but substantially broader and peaking at higher redshift for the low density universe models. There is, however, rather little difference between the open and lambda-dominated models as far as the probability distributions for M and Y. Things are somewhat better for T, implying that some combination of X-ray and SZE measurements will be needed for precision cosmological parameter determinations.

\section{Numerical simulations of gas in galaxy clusters}

The central task is for a given cosmological model, calculate the formation and evolution of a population of clusters from which synthetic X-ray and SZ catalogs can be derived. These can be used to calibrate simpler analytic models, as well as to build synthetic surveys (mock catalogs) which can be used to assess instrumental effects and survey biases. One would like to directly simulate $n(M,z), n(L_x,z), n(T,z), n(Y,z)$ from the governing equations for collisionless and collisional matter in an expanding universe.
Clearly, the quality of these statistical predictions relies on the ability to adequately resolve the internal structure and thermodynamical evolution of the ICM.

In Norman (2003) \cite{norman03}
I provided a historical review of the progress that has been made in simulating the evolution of gas in galaxy clusters motivated by X-ray observations. Since X-ray emission and the SZE are both consequences of hot plasma bound in the cluster's gravitational potential well, the requirements to faithfully simulate X-ray clusters and SZ clusters are essentially the same. Numerical progress can be characterized as a quest for higher resolution and essential baryonic physics. In this section I describe the technical challenges involved and the numerical methods that have been developed to overcome them. I then discuss the effects of assumed baryonic physics on ICM structure. Our point of reference is the non-radiative (so-called adiabatic) case, which has been the subject of an extensive code comparison
\cite{Frenk99}. I review the properties of adiabatic X-ray clusters,
and show that they fail to reproduce observed cluster scaling laws. I then show results of numerical hydrodynamic simulations incorporating radiative cooling, star formation, and galaxy feedback and their associated scaling properties.

\subsection{Dynamic range considerations}

\begin{figure}[htbp]
\includegraphics[width=3in,height=1.7in]{fig8a.eps}
\includegraphics[width=2.3in,height=1.5in]{fig8b.eps}
\caption{Left: A range of length scales of $\sim $250 separates the size of a reasonable survey volume and the virial radius of a rich cluster.
Right: Simplified structure of the ICM in a massive cluster. A range of length scales of $\sim $20-30 separates the virial radius and the core radius. }
\label{fig7}
\end{figure}

Figure 7 illustrates the dynamic range difficulties encountered with simulating a statistical ensemble of galaxy clusters, while at the same time resolving their internal structure. Massive clusters are rare at any redshift, yet these are the ones most that are most sensitive to cosmology.
From the cluster mass function (Fig. 6a), in order to get adequate statistics, one deduces that one must simulate a survey volume many hundreds of megaparsecs on a side (Fig. 7a). A massive cluster has a virial radius of
$\sim $2 Mpc. It forms via the collapse of material within a comoving Lagrangian volume of $\sim $15 Mpc. However, tidal effects from a larger region (50-100 Mpc) are important on the dynamics of cluster formation. The internal structure of cluster's ICM is shown in Fig. 7b. While clusters are not spherical, two important radii are generally used to characterize them:
the virial radius, which is the approximate location of the virialization shock wave that thermalizes infalling gas to 10-100 million K, and the core radius, within which the baryon densities plateau and the highest X-ray emissions and SZ intensity changes are measured. A typical radius is $\sim $200 kpc. Within the core, radiative cooling and possibly other physical processes are important. Outside the core, cooling times are longer than the Hubble time, and the ICM gas is effectively adiabatic. If we wanted to achieve a spatial resolution of 1/10 of a core radius everywhere within the survey volume, we would need a spatial dynamic range of D=500 Mpc/20 kpc = 25,000.
The mass dynamic range is more severe. If we want 1 million dark matter particles within the virial radius of a $10^{15} M_{\odot}$ cluster, then we would need $N_{particle} =M_{box} /M_{particle} =\Omega _m \rho _{crit}
L^3/10^9\approx 10^{11}$ if they were uniformly distributed in the survey volume.

Two solutions to spatial dynamic range problem have been developed: tree codes for gridless N-body methods \cite{KWH96,syw01}
and adaptive mesh refinement (AMR) for Eulerian particle-mesh/hydrodynamic methods \cite{bn97,Kravtsov97,Teyssier02,OShea04}.
Both methods increase the spatial resolution automatically in collapsing regions as described below. The solution to the mass dynamic range problem is the use of multi-mass initial conditions in which a hierarchy of particle masses is used, with many low mass particles concentrated in the region of interest. This approach has most recently used by Springel et al. (2000)
\cite{springel00},
who simulated the formation of a galaxy cluster dark matter halo with
$N=6.9\times 10^6$ dark matter particles, resolving the dark matter halos down to the mass scale of the Fornax dwarf spheroidal galaxy. The spatial dynamic range achieved in this simulation was $R=2\times 10^5$. Such dynamic ranges have not yet been achieved in galaxy cluster simulations with gas.

\subsection{Simulating cluster formation}

Simulations of cosmological structure formation are done in a cubic domain which is comoving with the expanding universe. Matter density and velocity fluctuations are initialized at the starting redshift chosen such that all modes in the volume are still in the linear regime.
Once initialized, these fluctuations are then evolved to z=0 by solving the equations for collisionless N-body dynamics for cold dark matter, and the equations of ideal gas dynamics for the baryons in an expanding universe. Making the transformation from proper to comoving coordinates $\vec {r}=a(t)\vec {x}$, Newton's laws for the collsionless dark matter particles become
\begin{equation}
\label{eq35}
\frac{d\vec {x}_{dm} }{dt}=\vec {\upsilon }_{dm} ,\mbox{ }\frac{d\vec
{\upsilon }_{dm} }{dt}=-2\frac{\dot {a}}{a}\vec {\upsilon }_{dm}
-\frac{1}{a^2}\nabla _x \phi
\end{equation}
where $x$ and $v$ are the particle's comoving position and peculiar velocity,
respectively, and $\phi$ is the comoving gravitational potential that includes baryonic and dark matter contributions. The hydrodynamical equations for mass, momentum, and energy conservation in an expanding universe in comoving coordinates are (\cite{Anninos97})
\begin{equation}
\label{eq36}
\begin{array}{l}
\frac{\partial \rho _b }{\partial t}+\nabla \cdot (\rho _b \vec {\upsilon
}_b )+3\frac{\dot {a}}{a}\rho _b =0, \\
\frac{\partial (\rho _b \upsilon _{b,i} )}{\partial t}+\nabla \cdot [(\rho _b \upsilon _{b,i} )\vec {\upsilon }_b +5\frac{\dot {a}}{a}\rho _b \upsilon _{b,i} =-\frac{1}{a^2}\frac{\partial p}{\partial x_i }-\frac{\rho _b
}{a^2}\frac{\partial \phi }{\partial x_i }, \\
\frac{\partial e}{\partial t}+\nabla \cdot (e\vec {\upsilon }_b )+p\nabla
\cdot \vec {\upsilon }_b +3\frac{\dot {a}}{a}e=\Gamma -\Lambda , \\
\end{array}
\end{equation}
where $\rho_b, p$ and $e$, are the baryonic density, pressure and internal energy density defined in the proper reference frame, $\vec {\upsilon }_b $ is the comoving peculiar baryonic velocity, $a=1/(1+z)$ is the cosmological scale factor, and $\Gamma $ and $\Lambda $ are the microphysical heating and cooling rates. The baryonic and dark matter components are coupled through Poisson's equation for the gravitational potential
\begin{equation}
\label{eq37}
\nabla ^2\phi =4\pi Ga^2(\rho _b +\rho _{dm} -\bar {\rho }(z))
\end{equation}
where $\bar {\rho }(z)=3H_0 \Omega _m (0)/8\pi Ga^3$ is the proper background density of the universe.

The cosmological scale factor $a(t)$ is obtained by integrating the Friedmann equation (Eq. \ref{eq4}). To complete the specification of the problem we need the ideal gas equation of state $p=(\gamma -1)e$, and the gas heating and cooling rates. When simulating the ICM, the simplest approximation is to assume $\Gamma $ and $\Lambda =0$; i.e., no heating or cooling of the gas other than by adiabatic processes and shock heating.
Such simulations are referred to as adiabatic (despite entropy-creating shock waves), and are a reasonable first approximation to real clusters because except in the cores of clusters, the radiative cooling time is longer than a Hubble time, and gravitational heating is much larger than sources of astrophysical heating. However, as discussed in the paper by Cavaliere in this volume, there is strong evidence that the gas in cores of clusters has evolved non-adiabatically. This is revealed by the entropy profiles observed in clusters \cite{Ponman99} which deviate substantially from adiabatic predictions. In the simulations presented below, we consider radiative cooling due to thermal bremsstrahlung, and mechanical heating due to galaxy feedback, details of which are described below.

\subsection{Numerical methods overview}

A great deal of literature exists on the gravitational clustering of CDM using N-body simulations. A variety of methods have been employed including the fast grid-based methods particle-mesh (PM), and particle-particle+particle-mesh (P$^{3}$M) \cite{Efstathiou81},
spatially adaptive methods such as adaptive P$^{3}$M \cite{Couchman91},
adaptive mesh refinement \cite{Kravtsov97}, tree codes
\cite{BarnesHut86,WarrenSalmon94}, and hybrid methods such as TreePM
\cite{Xu99}. Because of the large dynamic range required,
spatially adaptive methods are favored, with Tree and TreePM methods the most widely used today. When gas dynamics is included, only certain combinations of hydrodynamics algorithms and collisionless N-body algorithms are ``natural''. Dynamic range considerations have led to two principal approaches: P$^{3}$MSPH and TreeSPH, which marries a P$^3$M or tree code for the dark matter with the Lagrangian smoothed-particle-hydrodynamics (SPH)
method \cite{Evrard88,KWH96,syw01}, and adaptive mesh refinement (AMR),
which marries PM with Eulerian finite-volume gas dynamics schemes on a spatially adaptive mesh
\cite{bn97,OShea04,Teyssier02,Kravtsov03}.
Pioneering hydrodynamic simulations using non-adaptive Eulerian grids
\cite{Kang94,Bryan94,BN98}
yielded some important insights about cluster formation and statistics, but generally have inadequate resolution to resolve their internal structure in large survey volumes. In the following we concentrate on our latest results using the AMR code \textit{Enzo} \cite{OShea04}.
The reader is also referred to the paper by Borgani et al. \cite{Borgani04} which presents recent,
high-resolution results from a large TreeSPH simulation.

\textit{Enzo} is a grid-based hybrid code (hydro + N-body) which uses the block-structured AMR algorithm of Berger {\&} Collela \cite{Berger89} to improve spatial resolution in regions of large gradients, such as in gravitationally collapsing objects. The method is attractive for cosmological applications because it: (\ref{eq1}) is spatially- and time-adaptive, (\ref{eq2}) uses accurate and well-tested grid-based methods for solving the hydrodynamics equations, and
(\ref{eq3}) can be well optimized and parallelized. The central idea behind AMR is to solve the evolution equations on a grid, adding finer meshes in regions that require enhanced resolution. Mesh refinement can be continued to an arbitrary level, based on criteria involving any combination of overdensity
(dark matter and/or baryon), Jeans length, cooling time, etc., enabling us to tailor the adaptivity to the problem of interest. The code solves the following physics models: collisionless dark matter and star particles,
using the particle-mesh N-body technique \cite{Hockney88}; gravity, using FFTs on the root grid and multigrid relaxation on the subgrids; cosmic expansion; gas dynamics, using the piecewise parabolic method (PPM)\cite{Collela84};
multispecies nonequilibrium ionization and H$_{2}$ chemistry, using backward Euler time differencing \cite{Anninos97}; radiative heating and cooling, using subcycled forward Euler time differencing
\cite{Anninos94}; and a parameterized star formation/ feedback recipe \cite{Cen92}. At the present time, magnetic fields and radiation transport are being installed. \textit{Enzo} is publicly available at
{\textit{http://cosmos.ucsd.edu/enzo}}.

\subsection{Structure of nonradiative clusters: the Santa Barbara test cluster}

In Frenk et al. \cite{Frenk99} 12 groups compared the results of a variety of hydrodynamic cosmological algorithms on a standard test problem. The test problem, called the Santa Barbara cluster, was to simulate the formation of a Coma-like cluster in a standard CDM cosmology ($\Omega_m=1$)
assuming the gas is nonradiative. Groups were provided with uniform initial conditions and were asked to carry out a
``best effort'' computation, and analyze their results at z=0.5 and z=0 for a set of specified outputs. These outputs included global integrated quantities,
radial profiles, and column-integrated images. The simulations varied substantially in their spatial and mass resolution owing to algorithmic and hardware limitations. Nonetheless, the comparisons brought out which predicted quantities were robust, and which were not yet converged. In Fig.
8 we show a few figures from Frenk et al. (1999) which highlight areas of agreement (top row) and disagreement (bottom row).

\begin{figure}[htbp]
\includegraphics[width=5in,height=3.33in]{fig9.eps}
\caption{The Santa Barbara test cluster. Top row, left to right: profiles of dark matter density, gas density, and gas pressure. Bottom row, left to right: profiles of gas temperature, gas entropy, and X-ray emissivity.
Different symbols correspond to different code results. From \cite{Frenk99}.}
\label{fig8}
\end{figure}

The top row shows profile of dark matter density, baryon density, and pressure for the different codes. All are in quite good agreement for the
\textit{mechanical structure} of the cluster. The dark matter profile is well described by an NFW profile which has a central cusp \cite{NFW96}. The baryon density profiles show more dispersion, but all codes agree that the profile flattens at small radius, as observed. All codes agree extremely well on the gas pressure profile, which is not surprising, since mechanical equilibrium is easy to achieve for all methods even with limited resolution. This bodes well for the interpretation of SZE observations of clusters, since the Compton y parameter is proportional to the projected pressure distribution.
In section 5 we show results from a statistical ensemble of clusters which bear this out.

The bottom row shows the thermodynamic structure of the cluster, as well as the profile of X-ray emissivity. The temperature profiles show a lot of scatter within about one-third the virial radius (=2.7 Mpc).
Systematically, the SPH codes produce nearly isothermal cores, while the grid codes produce temperature profiles which continue to rise as r$\rightarrow $0. The origin of this discrepancy has not been resolved, but improved SPH formulations come closer to reproducing the AMR results
\cite{Ascasibar03}. This discrepancy is reflected in the entropy profiles. Again, agreement is good in the outer two-thirds of the cluster, but the profiles show a lot of dispersion in the inner one third. Discounting the codes with inadequate resolution, one finds the SPH codes produce an entropy profile which continues to fall as r$\rightarrow $0, while the grid codes show an entropy core, which is more consistent with observations \cite{Ponman99}.
The dispersion in the density and temperature profiles are amplified in the X-ray emissivity profile, since $\varepsilon _x \propto
\rho _b^2 T^{1/2}$. The different codes agree on the integrated X-ray luminosity of the cluster only to within a factor of 2. This is primarily because the density profile is quite sensitive to resolution in the core; any underestimate in the core density due to inadequate resolution is amplified by the density squared dependence of the emissivity. This suggests that quite high resolution is needed, as well as a good grasp on non-adiabatic processes operating in cluster cores, before simulations will be able to accurately predict X-ray luminosities.

\subsection{A numerical sample of adiabatic clusters: Universal Temperature Profile}

Three questions one can ask about the Santa Barbara cluster results are: 1)
is the cluster statistically representative, 2) do the results change substantially for a $\Lambda $CDM cosmology (the SB cluster assumed an EdS cosmology), and 3) what is the effect of additional baryonic physics on cluster structure? We address these questions here by summarizing results of
\textit{Enzo} simulations of the ICM in a sample of clusters in a concordance $\Lambda$CDM model drawn from a survey volume 256h$^{-1}$ Mpc on a side. Multimass initial conditions and AMR are used to achieve high spatial and mass resolution within the clusters. More details can be found in \cite{Loken02,Motl04,Motl05,
Hallman05}.

\begin{figure}[htbp]
\includegraphics[width=2.5in,height=1.7in]{fig10.eps}
\includegraphics[width=2.5in,height=1.7in]{fig11.eps}
\caption{Left: Temperature profiles from a sample of adiabatic cluster simulations (from Loken et al. 2002). Black curves bound the 1s confidence band from Markevitch et al. (1998). Right: Effect of radiative cooling on temperature profiles, compared with adiabatic sample average (red line) and observational data for cooling flow clusters (triangles) and non-cooling flow clusters (squares).}
\label{fig9}
\end{figure}

Fig. \ref{fig9} shows spherically averaged temperature profiles for 13(3)
$\Lambda$CDM(SCDM)
simulated clusters at z=0 analyzed by Loken et al. (2002)
\cite{Loken02}. These were chosen from a total sample of 22(10) clusters because their 2D projected temperature maps were symmetric; the rejected non-symmetric clusters were in various states of merging. The smooth black curves bound the 1$\sigma $
confidence band from Markevitch et al. (1998)\cite{Markevitch98}
who analyzed temperature profiles from a sample of 17 symmetric X-ray clusters observed with ASCA.
When temperature is normalized by the integrated emission-weighted temperature and the radius by the virial radius, both the observed data and the simulated data collapse to a narrow band, suggesting a universal temperature profile (UTP) outside the core region.
The fit to the numerical data is $T\propto
(1+r/\alpha )^{-\delta }$, with $\alpha \sim $r$_{vir}$/1.5 and $\delta
\sim $1.6. The $\Lambda$CDM clusters and SCDM clusters exhibit the same profile,
with a suggestion of a slightly higher normalization for clusters in the critically closed model. The fit is in good agreement with observations over the range 0.2$<$r/r$_{vir}<$0.5, but diverges at small radius where the effects of non-adiabatic processes appear to be at play \cite{deGrandi02}.
The reality of the UTP was somewhat controversial when early results from Newton/XMM were showing large isothermal cores. However, the latest Chandra observations of 13 nearby, relaxed clusters have shown that the UTP provides an excellent description for temperature profiles outside
$r\sim 0.15r_{vir}$ \cite{Vikhlinin04}. Subsequent numerical studies by Ascasibar et al. \cite{Ascasibar03} and Borgani et al. \cite{Borgani04}
using SPH have found agreement with the AMR results of Loken et al. The general agreement of numerical and observational results suggests that the declining temperature profile is a natural consequence of gravitational heating of the ICM during the process of cluster formation.

\subsection{Effect of additional physics}

\begin{figure}[htbp]
\includegraphics[width=3in,height=3.5in]{fig12.eps}
\includegraphics[width=2.5in,height=2in]{fig13.eps}
\caption{Left: Columns show X-ray surface brightness, projected temperature, and Compton y-parameter for a $M=2\times 10^{15} M_{\odot}$ cluster assuming different baryonic physics. Field of view is 5 h$^{-1}$ Mpc. Right:
Corresponding spherically averaged radial temperature profiles.}
\label{fig10}
\end{figure}

Within r=0.15 r$_{vir}$, Vikhlinin et al. \cite{Vikhlinin04}
found large variation in temperature profiles, but in all cases the gas is cooler than the cluster mean. This suggests that radiative cooling is important in cluster cores,
and possibly other effects as well. It has been long known that $\sim 60$ percent of nearby, luminous X-ray clusters have central X-ray excesses,
which has been interpreted as evidence for the presence of a cluster-wide cooling flows \cite{Fabian94}. More recently, Ponman et al. \cite{Ponman99}
have used X-ray observations to deduce the entropy profiles in galaxy groups and clusters.
They find an entropy floor in the cores of clusters indicative of extra,
non-gravitational heating, which they suggest is feedback from galaxy formation. It is easy to imagine cooling and heating both may be important to the thermodynamic evolution of ICM gas.

To explore the effects of additional physics on the ICM, we recomputed the entire sample of clusters changing the assumed baryonic physics, keeping initial conditions the same. Three additional samples of about 100 clusters each were simulated: The ``radiative cooling'' sample assumes no additional heating, but gas is allowed to cool due to X-ray line and bremsstrahlung emission in a 0.3 solar metallicity plasma. The ``star formation'' sample uses the same cooling, but additionally cold gas is turned into collisionless star particles at a rate $\dot {\rho }_{SF} =\varepsilon _{sf}
\frac{\rho _b }{\max (\tau _{cool} ,\tau _{dyn} )}$ , where $\varepsilon _{sf}$ is the star formation efficiency factor $\sim $0.1, and $\tau _{cool}$ and $\tau _{dyn}$ are the local cooling time and freefall time,
respectively. This locks up cold baryons in a non-X-ray emitting component,
which has been shown to have an important effect of the entropy profile of the remaining hot gas \cite{Bryan99,Voit00}. Finally, we have the ``star formation feedback'' sample, which is similar to the previous sample, except that newly formed stars return a fraction of their rest mass energy as thermal and mechanical energy. The source of this energy is high velocity winds and supernova energy from massive stars. In \textit{Enzo}, we implement this as thermal heating in every cell forming stars: $\Gamma _{sf}
=\varepsilon _{SN} \dot {\rho }_{SF} c^2$. The feedback parameter depends on the assumed stellar IMF the explosion energy of individual supernovae. It is estimated to be in the range $10^{-6}\le \varepsilon _{SN} \le 10^{-5}$ \cite{Cen92}. We treat it as a free parameter.

Fig. \ref{fig10} shows synthetic maps of X-ray surface brightness, temperature, and Compton y-parameter for a $M=2\times 10^{15} M_{\odot}$ cluster at z=0 for the three cases indicated. The ``star formation'' case is omitted because the images are very similar to the ``star formation feedback'' case (see reference
\cite{Motl05}.) The adiabatic cluster shows that the X-ray emission is highly concentrated to the cluster core. The projected temperature distribution shows a lot of substructure, which is true for the adiabatic sample as a whole \cite{Loken02}. A complex virialization shock is toward the edge of the frame. The y-parameter is smooth, relatively symmetric, and centrally concentrated. The inclusion of radiative cooling has a strong effect on the temperature and X-ray maps, but relatively little effect on the SZE map. The significance of this is discussed in Section 5. In simulations with radiative cooling only, dense gas in merging subclusters cools to 10$^{4}$ K and is brought into the cluster core intact \cite{Motl04}. These cold lumps are visible as dark spots in the temperature map. They appear as X-ray bright features. The inclusion of star formation and energy feedback erases these cold lumps, producing maps in all three quantities that resemble slightly smoothed versions of the adiabatic maps. However, an analysis of the radial temperature profiles (Fig. \ref{fig10}) reveal important differences in the cluster core. The temperature continues to rise toward smaller radii in the adiabatic case, while it plummets to $\sim $10$^{4}$ K for the radiative cooling case. While the temperature profile looks qualitatively similar to observations of so-called cooling flow clusters, our central temperature is too low and the X-ray brightness too high. The star formation feedback case converts the cool gas into stars, and yields a temperature profile which follows the UTP at $r\ge 0.15r_{vir} $, but flattens out at smaller radii. This is consistent with the high resolution
\textit{Chandra} observations of Vikhlinin et al. \cite{Vikhlinin04}.

\section{Comparisons and predictions for X-ray and SZE surveys}

In this section we shall compare the results of numerical hydrodynamical simulations with the analytic scaling laws derived in section 3, and compare with observational data. We will see that the X-ray temperature and the integrated SZE is a robust indicator of cluster mass with relatively little bias, while the X-ray luminosity is not because we cannot reliably simulate the X-ray emission from clusters.

\subsection{Analytic and numerical comparisons}

\begin{figure}
\centerline{\includegraphics[width=4in,height=2.5in]{fig14.eps}}
\caption{Comparing analytic and numerical predictions for cluster statistics.}
\label{fig11}
\end{figure}

We first ask the question how well do the simple analytic model estimates of cluster statistics agree with the results of numerical hydrodynamic simulations. This question was addressed by Bryan {\&} Norman 1998
\cite{BN98}.
Fig. \ref{fig11}
illustrates how the comparisons are made. For a given cosmological model Press-Schechter theory is used to calculate the halo mass function versus redshift (top rectangle). The observable quantities $n(T,z), N(L_x,z), n(Y,z)$
are then computed using the scaling relations presented in Section 3 for $L_x, T$ and $Y$ as a function of mass. Somewhat more work is involved deriving these results from numerical simulation (bottom rectangle). Initial conditions for the chosen cosmology are generated which specify dark matter and baryonic perturbations at the starting redshift. These perturbations are evolved use in the methods described in section 4 to z=0. The particle and baryonic distributions are output at specified redshifts for analysis.
Virialized objects are located using a group-finding algorithm on the dark matter particles list. Two popular techniques are friends-of-friends
\cite{Davis85} and HOP \cite{Eisenstein99}. In the friends-of-friends algorithm, two particles are part of the same group if their separation is less than some chosen value; chains of pairs then define groups. In the HOP algorithm, an estimate of the local density is associated with every particle. Each particle is linked to its densest neighbor and on to that particle's densest neighbor until one reaches the particle which is its own densest neighbor. All particles that are traced to the same such particle define the group. Once groups are found, centers of masses for each group are computed. With these centers determined, spherically averaged profiles of dark matter density, baryon density, temperature, etc. are computed by binning the 3D data into spherical shells. For each halo, the virial radius is determined by find the shell inside of which the mean total density (dark matter + baryons) equals the critical overdensity $\Delta_c$ (Section 3). Virial mass, X-ray luminosity, and emission weighted temperature are computed by numerical integration over the radial profiles of total density, X-ray emissivity, etc. With these quantities evaluated for each cluster in the sample, distribution functions are then computed.

\subsection{Cluster temperatures}

One of the most robust predictions of numerical simulations is the mass-temperature relation. Fig. \ref{fig12}a shows a comparison between analytic scaling relations and simulations for two cosmological models at three epochs. The simulations were carried out on fixed Eulerian grids of size 270$^{3}$ and 512$^{3}$ assuming the clusters are non-radiative. Good agreement is seen with a slight offset in normalization. Fitting Eq. \ref{eq28} to the data yields $f_T \approx 0.8.$ That the simulations reproduce the analytic scaling relations despite limited numerical resolution is a consequence of energy conservation, which is maintained to high accuracy by the numerical hydrodynamic method employed. Note that a cluster of a given mass is cooler at lower redshifts.

Fig. \ref{fig12}b shows the temperature distribution function as predicted by simulations (histograms) and Press-Schechter theory (curves) for a critically closed model (SCDM) and a low density model (OCDM). Generally,
agreement is good. Simulations underpredict the number of low temperature clusters due to resolution effects. The high temperature clusters are rare,
and thus not many are found in our small box. Despite these numerical limitations, one sees that the number of hot clusters evolves rapidly in the flat universe but evolves very little in the open universe.

Fig. \ref{fig13}a shows the predictions of simulations compared with the observational data of Henry {\&} Arnaud (1991)\cite{Henry91}.
The SCDM model is ruled out with high confidence, while the CHDM and OCDM models are marginally consistent with data. Eke, Cole {\&} Frenk (1996) \cite{ecf96} showed that with a suitable adjustment of $\sigma _{8}$, a critically closed, open, and
$\Lambda $-dominated models could all reproduce the observations
(Fig. \ref{fig13}b).
This illustrates what is known as the $\Omega _{0}-\sigma _{8}$
degeneracy in cluster abundances \cite{Bahcall97}. The redshift evolution of cluster abundances can in principle break this degeneracy, however this requires large samples of high redhift clusters with accurately measured temperatures. So far, the samples are small.
Temperatures are more difficult to measure than X-ray luminosities.
Nonetheless, available data shows mild evolution of the X-ray temperature function, consistent with a low density universe \cite{Rosati02}.

\begin{figure}[htbp]
\includegraphics[width=3in,height=2in]{fig15.eps}
\includegraphics[width=2.5in,height=2.5in]{Fig12b.eps}
\caption{Left: M-T scaling in a flat $\Omega _{m}$=1 universe (left) and an open $\Omega _{m}$=0.34 universe (right) for z=0, 0.5, and 1 (top to bottom). Symbols are measured values hydrodynamic simulations. Lines are the scaling relations from Eq. \ref{eq28}. with f$_{T}$=0.8 (from \cite{BN98}).
Right: Evolution of cumulative temperature distribution function for the two models shown in Fig 13 as predicted by theory (curves) and hydrodynamic simulations (histograms). The number of hot clusters evolves rapidly in the flat universe but evolves very little in the open universe.}
\label{fig12}
\end{figure}

\begin{figure}[htbp]
\includegraphics[width=2.5in,height=2in]{fig17.eps}
\includegraphics[width=2.5in,height=2.5in]{fig18.eps}
\caption{Left: Comparison of z=0 cluster temperature function from Henry {\&} Arnaud (1991) with hydrodynamic simulations. SCDM model ($\Omega _{0}$=1, $\sigma _{8}$=1.05) is ruled out with high confidence, OCDM model ($\Omega _{0}$=0.34, $\sigma _{8}$=0.75) is marginally consistent with data. (from Bryan {\&} Norman 1998). Right:
Figure 18. Illustration of the $\Omega _{0}-\sigma _{8}$ degeneracy. Good agreement with data is found for flat, open, and $\Lambda $-dominated cosmological models with a suitable adjustment of $\sigma _{8}$. From \cite{ecf96}.}
\label{fig13}
\end{figure}

\subsection{Cluster X-ray luminosities}

The most easily measured property of an X-ray cluster is its luminosity.
However, as we shall see, this is the most difficult quantity to predict using numerical simulations. This is because the integrated X-ray luminosity of a cluster is dominated by emission from the core region, which is challenging to resolve numerically, and it is affected by heating and cooling processes which are as yet not well understood. The advent of multiscale numerical simulation techniques has ameliorated the numerical resolution difficulties. As one can see from Fig. \ref{fig8}f, the X-ray emissivity peaks at about $0.1 r_{vir}$ for the adiabatic Santa Barbara cluster. SPH and AMR simulations can now resolve this scale with ten resolution elements or more in large cosmological volumes. Fig. \ref{fig14} shows the $L_x-M$ and $L_x-T$ scaling relation derived from our large sample of adiabatic galaxy clusters simulated using AMR in a $\Lambda$CDM universe. The numerical clusters are in good agreement with the analytic virial scaling relations $L_x \propto M^{4/3}$ and $L_x \propto T^2$ without resort to resolution corrections
(cf. Bryan {\&} Norman 1998). However, the adiabatic models are in conflict with the observed scaling relation, which are $L_x \propto M^{1.8}$
and $L_x \propto T^3$ for $T >2$ keV \cite{Rosati02}.

\begin{figure}
\includegraphics[width=5in,height=2.5in]{fig19.eps}
\caption{High resolution AMR simulations of adiabatic clusters (red crosses) agree with analytic scaling predictions (red lines), but disagree with observations (black lines). Addition of radiative cooling (blue diamonds) improves agreement, but produces too many clusters with cool cores. Figures courtesy P. Motl.}
\label{fig14}
\end{figure}

The disagreement between the predictions of adiabatic simulations and observations can be taken as strong evidence of the importance of non-adiabatic processes in the cores of galaxy clusters. The effect of radiative cooling is shown by the open diamonds in Fig. \ref{fig14}. Although the $L_x-M$ and $L_x-T$ scaling steepens in the direction of observations, we view these models as unrealistic since every cluster in the sample has too much cold gas in the core, contrary to observations. The scaling relations for the ``star formation'' and ``star formation feedback'' samples are show in Fig. \ref{fig15}a.
The conversion of cool gas into stars produces clusters whose temperature and X-ray surface brightness profiles are in better agreement with observations, and steepens the $L_x-T$
relation somewhat relative the to adiabatic clusters. The inclusion of supernova heating has a rather minor effect when compared to the magnitude of the change including star formation. This is best illustrated in Fig. \ref{fig15}b,
which shows the scatter of central entropy versus central temperature for the adiabatic, star formation, and star formation feedback cluster samples.
An analysis of a sample of clusters by Ponman et al. (1999)
\cite{Ponman99} revealed the existence of an ``entropy floor''. This feature has been interpreted as evidence of galaxy formation feedback which increases gas entropy. The same data has been explained as the result of radiative cooling \cite{Bryan99,Voit00}
which locks up low entropy gas in stars where it does not contribute to X-ray emission. The magnitude of the entropy floor strongly suggests the heating explanation. The failure of star formation feedback simuations to exhibit the entropy floor may be due to limited mass resolution. The galaxy mass function is not well sampled is these simulations; indeed, only the central dominant galaxy and one or two of the most massive galaxies are present in these simulations. Perhaps higher resolution simuations will improve agreement. AGN heating is another source of energy input that may be important, especially in the cores of clusters \cite{Ruszkowski02}.
Numerical simulations incorporating these effects are in their infancy, and certainly not at the stage where large ensembles can be simulated for statistical analysis.

\begin{figure}[htbp]
\includegraphics[width=2.5in,height=2.5in]{fig20.eps}
\includegraphics[width=2.5in,height=2.5in]{fig21.eps}
\caption{Left: Effect of baryonic physics on the L-T relation for three AMR cluster samples: adiabatic (crosses), star formation (triangles), and star formation feedback (squares). Right: Central entropy versus central temperature for the cluster samples in Fig \ref{fig12}. The dashed line is the observed ``entropy floor''. Figures courtesy P. Motl.}
\label{fig15}
\end{figure}

\subsection{Prospects for SZE cluster surveys}

The sensitivity of X-ray luminosity to numerical resolution and baryonic processes motivates us to look for other more robust indicators of a cluster's mass. Temperature is such an indicator, however this is more difficult to measure than X-ray luminosity even at low redshifts. At high redshifts the task becomes even more difficult because of the severe $(1+z)^{-4}$
surface brightness dimming of the X-ray flux. In this section we explore the thermal SZE effect as a mass indicator based on our four catalogs of simulated galaxy clusters. Based on these models, we find that the integrated SZE $y_{500}$ is a less biased indicator of cluster mass than either the X-ray luminosity or temperature, and shows far less scatter than the central value of the SZE intensity change $y_0$. More details can be found in references
\cite{Motl05,Hallman05}

\begin{figure}[htbp]
\includegraphics[width=3in,height=2in]{fig22.eps}
\includegraphics[width=2in,height=2in]{fig23.eps}
\caption{Left: The ``lightcurve'' for the central value of the Compton parameter,
$y_0$, obtained from tracking one particular halo from a redshift of 4 to the present epoch. Major mergers can boost $y_0$ by a factor of 10. Right:
Projected y parameter distribution of cluster at the epochs marked by vertical lines in the lightcurve. Figures courtesy P. Motl.}
\label{fig16}
\end{figure}

As has been discussed elsewhere in this volume (Rephaeli, Birkinshaw), the thermal SZE is an attractive cosmological probe because it is redshift independent. The strength of the SZE is proportional to the Compton parameter, y, which for non-relativistic electrons is essentially the integral of the gas pressure through the cluster
\begin{equation}
y=\int {\frac{k_B T}{m_e c^2}} \sigma _T n_e d\ell \propto \int {nTd\ell .}
\end{equation}
The central value of the Compton y parameter we refer to as $y_0$. We define the integrated SZE $y_{500}$ as the area integral of the y parameter out to
$r_{500}$, the radius inside of which the mean density is 500 times the critical density:
\begin{equation}
y_{500} =2\pi \int\limits_0^{r_{500} } {y(r)rdr.}
\end{equation}
The detectability of a cluster is given by its SZ cross section (Section 3),
which is essentially $y_{500} /d_A^2 \propto (1+z)^{-2}$. This is far more favorable redshift dependence than X-rays provide.

Fig. \ref{fig16}a shows the redshift evolution of $y_0$ for the most massive cluster in our sample. As can be seen, $y_0$ exhibits a secular increase as the cluster potential deepens, but is boosted by up to a factor of $\sim $20(2) during major(minor) merger events. The duration of these events is of order the dynamical time $\sim $1-2 Gyr. The effect of mergers induces considerable scatter into scaling between $y_0$ and the enclosed mass $M_{500}$ in our sample of clusters at z=0 (Fig. \ref{fig17}a). By contrast, $y_{500}$
shows a much tighter correlation (Fig. \ref{fig17}b). The reason for this is illustrated in the lower two panels of Fig. \ref{fig17} where we plot the central value of the gas pressure $p_0$ and the volume averaged pressure $p_{500}
=\frac{3}{4\pi r_{500}^3 }\int\limits_0^{r_{500} } {p(\vec {x})d^3\vec {x}}
$. The central pressure exhibits large scatter due to the presence of shock waves induced by mergers. However, the volume averaged pressure exhibits relatively little scatter. This is a consequence of virial equilibrium and tells us that the clusters are approximately in equilibrium within $r_{500}$.

Fitting the data to a power law of the form
\begin{equation}
y_{500} =A\left[ {\frac{M_{500} }{10^{14}M_{\odot}}} \right]^\alpha
\end{equation}
for each of our 4 catalogs, we find $\alpha \sim 1.6, \sigma_{\alpha}\sim 0.025$
for the adiabatic, star formation, and star formation feedback samples, and
$\alpha \sim 1.7, \sigma_{\alpha}\sim 0.03$
for the radiative cooling sample. The scaling exponent is consistent with the findings of da Silva et al (2004) \cite{daSilva04}.
Ignoring the radiative cooling only runs as unrealistic, we find that the scaling is relatively insensitive to baryonic physics. This is both reassuring and understandable in that regardless of the thermodynamics of the gas,
hydrostatic equilibrium is maintained to a good approximation. By looking back through our catalogs in redshift, we find that the coefficient A is independent of redshift.

\begin{figure}
\centerline{\includegraphics[width=5in,height=4in]{Fig17b.eps}}
\caption{Upper: The scaling relations between $y_0$ and $y_{500}$ and the total cluster mass within the same radius at z=0 for the star formation with feedback cluster sample. Two randomly chosen, orthogonal projections for each cluster are plotted as individual points and the catalog contains $\sim 100$ clusters at this epoch in the mass range $1 \times 10^{14} M_{\odot} \leq M_{200} \leq 2 \times 10^{15} M_{\odot}$. The best fit relations are plotted as solid lines. Lower: Central pressure and pressure integrated inside sphere of radius
$r_{500}$ plotted against cluster total mass. From \cite{Motl05}.}
\label{fig17}
\end{figure}

\subsection{Cluster mass estimates compared}

To assess the systematic biases and relative scatter of various means of estimating cluster masses from X-ray and SZE data, we ``observed'' our four clusters samples and analyzed the resulting synthetic images in the same way as observations. Our goal was to find both the best cluster mass estimator and best method of analysis. These were defined as the combination which produce the least bias and smallest scatter between inferred cluster mass and actual (simulated) mass. Here we merely summarize our findings; for details the reader is referred to \cite{Hallman05}.

Cluster masses can be obtained from X-ray and thermal SZE observations in several ways. The most widely used is the isothermal beta model, wherein it is assumed the electron number density is spherically symmetric and follows

\begin{equation}\label{eq41}
n_e (r)=n_{e0} \left[ {1+\left( {\frac{r}{r_c }} \right)^2} \right]^{-3\beta
/2},
\end{equation}

where $n_{e0}$ is the central electron density. Approximating the gas as isothermal with average temperature $\langle T \rangle$ within the fitting radius, then the X-ray surface brightness is
\begin{equation}\label{eq42}
S_X (r)=S_{X0} \left[ {1+\left( {\frac{r}{r_c }} \right)^2}
\right]^{\frac{1}{2}-3\beta }
\end{equation}
where $S_{X0} \propto n_{e0}^2 \left\langle T \right\rangle ^{\frac{1}{2}}$.
Similarly for the SZE, a beta model density distribution results in a projected radial distribution for the Compton y parameter
\begin{equation}\label{eq43}
y(r)=y_0 \left[ {1+\left( {\frac{r}{r_c }} \right)^2}
\right]^{\frac{1}{2}-\frac{3\beta }{2}}
\end{equation}
where $y_{0} \propto n_{e0} \left\langle T \right\rangle.$

By fitting the observed profiles of $S_x(r)$ and $y(r)$ one obtains
$\beta$ and $r_c$, the core radius. With $\left\langle T \right\rangle$
measured observationally, $n_{e0}$ can then be calculated.
One then integrates Eq. \ref{eq41} to find the gas mass within the fitting radius $r_<$. The cluster dynamical mass is then $M_{dyn} (r_< )=M_{gas} (r_<
)/f_b (r_< ),$ where $f_b$ is the baryon fraction which may in general be different from the cosmic mean $\Omega_m/\Omega_b$ depending upon the radius. Henceforth we will refer to mass estimates made in this way as X-ray-ISO and SZE-ISO.

Recently is has been shown both in simulations (Loken et al. 2002, Section 4)
and in X-ray observations (Vikhlinin et al. 2005) that clusters are not isothemal at large radii, but follow a universal temperature profile (UTP)
\begin{equation}\label{eq43a}
T(r)=\left\langle T \right\rangle _{500} \left[ {1+\left( {\frac{r}{\alpha r_{500} }} \right)^2} \right]^{-\delta }
\end{equation}
where $\langle T_{500} \rangle$ is the average temperature inside
$r_{500}$, and $\alpha$ and $\delta$ are fitting parameters determined from a large sample of clusters. Improved mass estimates can be obtained by geometric deprojection of the X-ray and SZE profiles if one knows the temperature of each radial shell. This is provided by the UTP. For example, the X-ray surface brightness can be deprojected to yield the X-ray emissivity in each spherical shell (e.g., \cite{Buote00}).
Knowing the temperature profile, once can obtain the mass in each shell. A similar technique can be applied to the SZE profile. By summing over shells,
one obtains the gas mass within the fitting radius. Mass estimates obtained in this way we refer to as X-ray UTP and SZE-UTP.

\begin{figure}
\centerline{\includegraphics[width=4in,height=2.5in]{fig25.eps}}
\caption{Comparison of median values and scatter of gas mass estimates inside
$r_{500}$ for full SFF cluster sample (triangles) and cleaned SFF sample
(diamonds) at z=0 for each of four methods: UTP-X-ray (U-X), UTP-SZE (U-SZ),
isothermal X-ray (I-X), and isothermal SZE (I-SZ) as descibed in the text.
From \cite{Hallman05}.}
\label{fig18}
\end{figure}

Fig. \ref{fig18} shows the ratio of the measured mass to the actual mass for the star formation feedback catalog of simulated clusters for the four methods described above. The triangles are the full sample, whereas the diamonds are for samples which have been cleaned of highly distorted clusters resulting from recent mergers. The error bars enclose the 80{\%} confidence range. As can be seen, cleaning the sample reduces the scatter considerably. Among the different methods, the X-ray measurements yield the smallest scatter, but overestimate the cluster masses by 5-10{\%}. Conversely, the SZE-UTP measurements yield unbiased estimates the cluster mass, with somewhat more scatter. As shown in \cite{Hallman05}, the scatter in the SZE estimates decreases as the fitting radius is increased to $r_{200}$, while no improvement is seen in the X-ray estimates. This is to be expected since the X-ray emission is heavily core-weighted, while the SZE samples larger radii.

\subsection{Conclusions}

We have seen that galaxy clusters are sensitive cosmological probes provided their masses can be measured with precision. Both analytic estimates and numerical simulations show that the evolution of their comoving number density is sensitive to cosmology. With improvements in X-ray observations and impending large area surveys to detect clusters via the SZE, it is paramount to assess the accuracy to which cluster masses can be obtained observationally. Based on our catalogs of simulated clusters using adaptive mesh refinement, we find that gas masses can be measured to $\sim $10{\%}
accuracy with 80{\%} confidence. Our study ignores instrumental or other observational effects. These limits in precision are a direct result of the deviation of the simulated clusters from simple assumptions about their physical and thermodynamic properties, dynamical state, and sphericity.
Comparing a variety of methods, we find that SZE methods assuming a UTP produce the smallest scatter when estimating masses from a raw sample of clusters. Cleaning the cluster sample of obvious mergers does not improve the SZE estimates much, but improves the X-ray estimates substantially. As a practical matter, we find SZE methods are superior for mass estimation of large samples of clusters out to high redshift. This is particularly true if the cutoff radius is the virial radius, as this has the effect of smoothing out any boosting effects in the cluster core due to mergers.

Comparing mass estimates from our four catalogs, we find that our conclusions are insensitive to assumed baryonic physics, except for the cooling sample, which yields unrealistic-looking clusters. Mass estimates derived from the cooling sample are systematically high (50-100{\%}) despite excising the overluminous X-ray core. Reasons for this are discussed in detail in reference \cite{Hallman05}.
We conclude that cool core clusters are poor candidates for precision mass estimation, in disagreement with previous studies \cite{Allen98}.

\acknowledgments The author is indebted to his collaborators Greg Bryan,
Jack Burns, Eric Hallman, Chris Loken, and Patrick Motl whose results, both published and unpublished, are presented here. Simulations were performed at the National Center for Supercomputing Applications of the University of Illinois, Urbana-Champaign with support from NSF grants ASC-9318185,
AST-09803137.

\end{document}
\endinput
\title{One-way quantum computation with four-dimensional photonic qudits}

\begin{abstract}

We consider the possibility of performing linear optical quantum computation making use of extra photonic degrees of freedom. In particular we focus on the case where we use photons as quadbits,
4-dimensional photonic qudits. The basic 2-quadbit cluster state is a hyper-entangled state across polarization and two spatial mode degrees of freedom. We examine the non-deterministic methods whereby such states can be created from single photons and/or Bell pairs, and then give some mechanisms for performing higher-dimensional fusion gates.

\end{abstract}

\section{Introduction}
\label{Intro} Optical quantum computation is a strong candidate for a scalable quantum computer. Photons have low decoherence rates, and high fidelity optical components are readily available. In this article we focus on the linear optical quantum computation (LOQC) paradigm, for which the resource overheads of the original LOQC proposal \cite{Knill01} have been greatly reduced by making use \cite{Nielsen04,
Browne05,Kok,kieling06,kieling07,Gross,Rohde,
Dawson06,Dawson062,Gilbert,Zhang,varnava} of the one-way quantum computation model \cite{Briegel01,Raussendorf01}.

Significant hurdles to practical LOQC remain, however. At present the primary obstacle is a deterministic source of photons. Much progress has been made along these lines
\cite{Walther05,everyone}, but it is clear that there is still a long way to go. Particularly exciting is the possibility of creating ``on-demand'' entangled pairs of photons \cite{toshiba,
gershoni}, which obviate the need for initially creating such entangled pairs from single photons \cite{pan}. The investigations of this paper are based around an assumption that at some time in the near future efficient deterministic sources of either single photons or entangled photon pairs will become available.

It is not always obvious how to compare the resource requirements of various different proposals for implementing LOQC within the cluster state paradigm (e.g. how many single photon sources,
memory units and feedforward steps is an entangled pair source
``worth''?). Since the primary difficulties for LOQC relate to sources and detectors, it is clear that schemes which reduce the number of photons actually used in an implementation are desirable \footnote{In general, within the circuit model, the results of \cite{Muthukrishnan} suggest that quantum computation with quadbits can be expected to result in a space saving of at least $O(\log_2 d)$, and a time saving of at least $O(\log_2 d)^2$. The extent to which such savings translate into the cluster state model are largely unexplored - they will depend on optimal decompositions of qudit cluster circuits for general two-qudit unitary operations. Such optimal decompositions are not completely characterized for even the qubit case yet.}. A travelling photonic wavepacket is in principle a multi-mode creature, and thus can be treated as a $d$-dimensional quantum system (a ``qu$d$it''). There is a $d$-dimensional version of cluster state computing \cite{Zhou03, BillHall}, and one purpose of this paper is to explore procedures whereby such
$d$-dimensional clusters can be created. The second motive is to examine some basic ``initial state'' resource tradeoffs, such as:
``how many Bell pairs does it take to make a hyper-entangled state''.

For concreteness we focus on the \emph{quadbit} case -
specifically, we treat a single photon as a four-dimensional quantum system; using the two polarization states of two different spatial modes to encode the four levels.

\section{Quadbit cluster states}
\label{Sec:01}
\subsection{General quadbit cluster states}
\label{Sec:0101}

In this section we review the features of quadbit cluster states we shall make use of - a pedagogical overview of the higher-dimensional cluster state computing can be found in
\cite{BillHall}.

We label the computational basis states $\{|\bar0 \rangle ,|\bar1 \rangle,
|\bar2 \rangle ,|\bar3 \rangle \}$ (use of the overbar is to prevent confusion with 0 and 1 photon Fock states). In terms of these we can define the quadbit version of a Hadamard rotation, which rotates the computational basis state $|\bar i\rangle$ to $|+_i\rangle$ ($i=0,1,2,3$), where
\begin{eqnarray}
\label{five:eq03} | +_{i} \rangle &=& {1\over 2} ( |\bar0 \rangle
+ {\rm e}^{{\rm i} {i \pi \over 2}} |\bar1 \rangle + {\rm e}^{{\rm i} \,i \pi}|\bar2 \rangle + {\rm e}^{{\rm i} {3 i \pi
\over 2}} |\bar3 \rangle),
\end{eqnarray}

A 2-quadbit cluster state $|QdC_2\rangle$ is then given by the superposition
\[|QdC_2\rangle={1 \over 2}\sum_{i=0}^3|\bar i\rangle|+_i\rangle,\] which should be compared with the equivalent 2-qubit cluster state
$|C_2\rangle=(|0\rangle|+\rangle+|1\rangle|-\rangle)/\sqrt{2}$. In the case of qubits a two-qubit (non-destructive) parity gate operation would fuse
\cite{Browne05} two 2-qubit clusters into the state
$|C_3\rangle=(|+\rangle|00\rangle|+\rangle+|-\rangle|11\rangle|-\rangle)/\sqrt{2}$, and repeated such fusion operations allows for the growth of arbitrary cluster states (the redundant encoding of the central qubit is easily removed by a measurement in the $|\pm\rangle$ basis, yielding the 3-qubit cluster state as claimed). Similarly, in the quadbit cluster case arbitrary quadbit clusters can be grown using a quadbit fusion operation. Applied to two 2-quadbit clusters such a fusion would achieve the state
$|QdC_3\rangle=\sum_{i=0}^3|+_i\rangle|\bar i \bar i\rangle|+_i\rangle/2$.

\subsection{Optical quadbit cluster states}
\label{Sec:0102}

We define a quadbit single photon quantum state in two polarization/spatial modes as follows:
\begin{eqnarray}
\label{five:eq01} |\bar0 \rangle &\equiv& |H \rangle_1 \,
,~|\bar1 \rangle \equiv |V \rangle_1 \, , ~ |\bar2 \rangle \equiv
|H \rangle_{2} \, ,~|\bar3 \rangle \equiv |V \rangle_{2} \, ,~~~~~
\end{eqnarray}
where $H (V)$ denotes horizontal (vertical) polarization, and the subindex $1 (2)$ denotes spatial mode $k_1$($k_2$).

Consider now a so-called hyper-entangled state (HES)
\cite{kwiat01}, which is a two-photon state entangled in both polarization and spatial modes. Two-photon HES's can be generated by spontaneous parametric down-conversion \cite{kwiat}.
As with generation of single photons, such a mechanism of HES production is not scalable. In Section \ref{Sec:0201}, we will consider scalable production of HES's given deterministic single photon sources or entangled pairs. It is possible to represent an HES as product of Bell states, with a virtual tensor product structure between the spatial and polarization modes, for example:
\begin{eqnarray}
\label{five:eq05}
|\Phi^{+}_{\rm HES} \rangle &=& {1 \over 2} ( | H \rangle | H
\rangle + |V \rangle |V \rangle)\otimes(|1\rangle|3\rangle +
|2\rangle|4\rangle).~~~~~
\end{eqnarray}
(We will always use the $\otimes$ symbol to refer to this virtual tensor product of spatial modes and polarizations). Using the identification in Eq.~(\ref{five:eq01}), we see that
$|\Phi^{+}_{\rm HES} \rangle$ is equal to
\begin{eqnarray*}
\label{five:eq06} |\Phi^{+}_{\rm HES} \rangle &=& {1\over 2}
(|H\rangle_1|H\rangle_3+|V\rangle_1|V\rangle_3+|H\rangle_2|H\rangle_4+|V\rangle_2|V\rangle_4)
\\&=& {1\over 2} (
|\bar0 \rangle |\bar0 \rangle + |\bar1 \rangle |\bar1 \rangle +
|\bar2 \rangle |\bar2 \rangle + |\bar3 \rangle |\bar3 \rangle).
\end{eqnarray*}
As any single mode unitary operation can be implemented with linear optics \cite{reck}, a simple circuit can be constructed which rotates the quadbit in modes 3 and 4 to yield the optical 2-quadbit cluster state $|QdC_2\rangle$ defined above.

Consider attempting to fuse two 2-quadbit clusters, the first in modes (1,2;3,4) (as in Eq.(\ref{five:eq05})), the second in modes
(5,6;7,8). The procedure required to fuse the quadbit in spatial modes 1,2 with that in 5,6 is a gate which (when successful)
performs a projective measurement of the form:
\[
|HH\rangle_{1\,5}\langleHH|+|VV\rangle_{1\,5}\langleVV|+|HH\rangle_{2\,6}\langleHH|+|VV\rangle_{2\,6}\langleVV|.
\]
That is, a successful measurement should reveal ``the photons were in corresponding spatial modes with the same polarization'',
but should not reveal in which spatial modes and with what polarization. In section \ref{Sec:03} we will show that such a fusion is possible, although we have only found methods of doing it that make use of ancillary systems, and for which the success probability strongly depends on the nature of the ancillas available.

\section{Generation of quadbit cluster states}
\label{Sec:02}

Before discussing possible fusion mechanisms, we turn to examining some
``initial state resource tradeoffs''. This is because, as in the case of single photons, parametric downconversion is not a suitable source for scalable LOQC. Therefore we may well need to generate deterministic HES's from a deterministic source of either single photons, Bell pairs or GHZ states. Whether the constructions we give are optimal (or even close to being so) we cannot determine. Thus the procedures we present should be seen as simply giving upper bounds on the resources required. Also,
because the most efficient fusion gate we will present for quadbit clusters destroys the photons involved (much like Type-II fusion for qubits) we will need to look at mechanisms for generating an initial resource of 3 and 4 quadbit cluster states.

Basic notation for the figures, and a brief outline of the operation of the fundamental optical components is set out in Appendix A.

\subsection{General procedure for HES generation}
\label{Sec:0201}

The general circuit we present (Fig.\ref{fig:J2}) is built from two copies of a sub-circuit we label $J_1$, and we first explain the operation of this circuit.

The circuit $J_1$ consists of three beam splitters (BSs) with two vacuum inputs. Consider the case where a Bell state $(|H\rangle_1
|H\rangle_2+|V\rangle_1|V\rangle_2)/\sqrt{2}$ is input into $J_1$. The first BS creates a bunched two-photon state in modes 1 and 2, and then two vacuum inputs are applied from modes $1'$ and $2'$ with two regular BSs. After the circuit $J_1$, the state of two photons in mode 1, $1'$, $2$, and $2'$ is equal to
\begin{eqnarray}
\label{six:eq02} &&|M \rangle_{121'2'} = {1 \over 4} ( | H \rangle
| H \rangle + |V \rangle |V \rangle) \nonumber \\ &&~~~~~~~\otimes
\sum_{j=1}^{2}\big[{\rm e}^{{\rm i}j\pi}(|j\rangle|j\rangle +|j'\rangle|j'\rangle +
\sqrt{2} |j\rangle|j'\rangle ) \big].~~~~~~~
\end{eqnarray}
It is a combination of four states of bunched photon pairs in a spatial mode ($|j\rangle|j\rangle$ and $|j'\rangle|j'\rangle$)
and two anti-bunched states in two different spatial modes ($|j\rangle|j'\rangle$).

\begin{center}
\begin{figure}[h]
\resizebox{!}{6.5cm} {\includegraphics{J2} } \caption{(color online) \label{fig:J2} (a) Circuit $J_1$ (b) Circuit $J_2$ for a hyper-entangled state from four entangled photons}
\end{figure}
\end{center}

We turn now to the full circuit $J_2$ depicted in Figure
\ref{fig:J2} (b). At the centre of the circuit is a source $S_2$
which can be either single photons, Bell pairs or a 4-photon GHZ state. This source is then fed into two copies of the $J_1$ gate,
the outputs of which impinge on 50:50 beam splitters as shown. It is easiest to begin with the case that the source consists of two Bell pairs.

The initial state of the two Bell pairs $|{\Phi^{+}} \rangle_{1\,2}
|{\Phi^{+}} \rangle_{3\,4}$ is
\begin{eqnarray}
\label{six:eq04} && {1 \over 2} ( | H \rangle_1 | H \rangle_2 +
|V \rangle_1 |V\rangle_2 )( | H \rangle_3 | H \rangle_4 + |V
\rangle_3 |V\rangle_4 ).
\end{eqnarray}
According to Eq. (\ref{six:eq02}), the state after the two $J_1$
circuits is equal to
\begin{eqnarray}
\label{six:eq045} && \hspace{-1cm} |M \rangle_{1\,2\,1'\,2'} |M
\rangle_{3\,4\,3'\,4'}\nonumber \\
&=& {1 \over 16}( | H \rangle | H \rangle + |V \rangle |V
\rangle)( | H \rangle | H \rangle + |V \rangle |V \rangle)
\nonumber \\&& ~~~\otimes \sum_{j=1}^{2}\big[{\rm e}^{{\rm i}j\pi}(|j\rangle|j\rangle +|j'\rangle|j'\rangle + \sqrt{2}
|j\rangle|j'\rangle ) \big] \nonumber \\&& ~~~~~~
\sum_{k=3}^{4}\big[{\rm e}^{{\rm i}k\pi}(|k\rangle|k\rangle
+|k'\rangle|k'\rangle + \sqrt{2} |k\rangle|k'\rangle ) \big].~~~
\end{eqnarray}

At the end of the $J_1$ circuits, two BSs are applied in modes
$1',4'$ and $2',3'$, after which detectors are located. Successful operation occurs when two identically polarized photons are detected in modes $1', 4'$ or $2', 3'$ respectively, and the success probability of the detection pattern is 1/16. To see how this works, note that it is the components of the state in Eq.
(\ref{six:eq045}) which consist of two bunched photons
($|j'\rangle|j'\rangle$ and $|k'\rangle|k'\rangle$) that can yield successful detection : the anti-bunched photonic states
($|j\rangle|j'\rangle$ and $|k\rangle|k'\rangle$) result in destructive interference. For example, if we detect two horizontal photons in modes $1'$ and $4'$ but nothing in modes
$2'$ and $3'$, the outcome state is
\begin{eqnarray}
\label{six:eq06} | \psi'_{\rm HES} \rangle &=& {1 \over 2
\sqrt{2}} ( | H
\rangle | H \rangle + |V \rangle |V \rangle) \nonumber \\
&& \otimes (|1\rangle|1\rangle - |2\rangle|2\rangle +
|3\rangle|3\rangle - |4\rangle|4\rangle).~~~~~
\end{eqnarray}
This state is, up to a linear optical transformation (in this case two BSs in mode 1 and 2 and mode 3 and 4), a hyper-entangled state.

It is interesting to note that the failure outcomes can still yield photons in useful states. In particular the failure outcome where only the vacuum is detected leaves all the photons still in two Bell pairs ; this occurs with probability 1/16, and obviously the gate can then simply be repeated. This suggests the overall success probability is essentially 1/8. Some of the detection patterns, while not yielding an HES do still leave two of the photons in Bell pair, which could be recycled.

We are also able to use for the source a four-qubit GHZ state of the form $(|HHHH\rangle_{1234}+|VVVV\rangle_{1234})/\sqrt{2}$ rather than two Bell pairs; this yields a higher success probability. This also has the advantage that in this case we need not assume the four detectors are polarization sensitive : they need only count numbers of photons at the output of the primed modes. Upon successful detection, when two photons are detected in any two spatial modes, the state in modes 1 to 4 becomes a HES with a success probability 3/16, which is higher than the case of two Bell pairs. Interestingly, no photon detection yields a 4-photon entangled state such as $(|\Phi^{+}\rangle_{12}
|\Phi^{+}\rangle_{34}+|\Psi^{+}\rangle_{12} |\Psi^{+}\rangle_{34})/\sqrt{2}$.

Finally, if we wish to create a HES ballistically from single photons, then we can replace the two Bell pairs input at the source $S_2$ by two copies of the circuit for generating a Bell pair from 4 single photons (Figure \ref{NewS01} in Appendix
\ref{Append03}). In this case we find that the success probability is $1/16^3$.

\subsection{Generating larger quadbit cluster states}
\label{Sec:0202}
\subsubsection{3 quadbit cluster state}
\label{Sec:020201}
\begin{figure}[h]
\centering
\includegraphics[width= 9cm]{MQF02} \vspace{-1cm}
\caption{ \label{MQF02} (color online) Circuit $K_1$ for a 3 quadbit cluster state from two HESs}
\end{figure}

To create a 3 quadbit cluster state, we use the ``modified quantum filter'' (MQF) scheme we present in Appendix \ref{Append0202}.
This circuit implements a parity gate between the input photons in a manner which does not destroy the input photons when it is successful, and moreover is unaffected by situations wherein one of the input modes is empty.

Our circuit for generating a 3 quadbit cluster from two HES's is depicted in Figure \ref{MQF02}. $S_1$ and $S_2$ are sources of initial HESs each in $|\Phi^{+}_{\rm HES} \rangle$. Note that there is \emph{one} photon spread across spatial modes (3,4) and one photon spread across spatial modes (5,6) - the circuit is a two-photon gate, and only one photon will be detected - this is reminiscent of fusing together two Bell pairs by Type-I fusion to create a 3 qubit GHZ (cluster) state, and in fact this gate does act as a Type-I fusion gates for quadbits. After a successful operation in modes 3 and 5 of the MQF, the outcome state is equal to
\begin{eqnarray}
\label{seven:eq02} && {\sqrt{2} \over 6} \big[ |H\rangle_1
|H\rangle_3 |H\rangle_5 |H\rangle_7 + |V\rangle_1 |V\rangle_3
|V\rangle_5 |V\rangle_7 \nonumber \\ &&~+ 2 (|H\rangle_2
|H\rangle_4 + |V\rangle_2 |V\rangle_4) (|H\rangle_6
|H\rangle_8 + |V\rangle_6 |V\rangle_8 ) \big], \nonumber \\
\end{eqnarray}
(the measurement operator for operation of the MQF's is presented in Eq. (\ref{QF04}) in Section \ref{Append0202}). Note that in Eq.
(\ref{seven:eq02}), the first two terms contain a photon in mode 3 and the other terms also have a photon in mode 6 (these are the modes which will be detected). After a polarizing beam splitter
(PBS) between modes 4 and 6, two $R_{\pi/4}$s, and a BS in mode 3 and 6, detection of a photon in either mode 3 or 6 results in successful gate operation. The outcome state results from only four terms in Eq. (\ref{seven:eq02}), such as $|H\rangle_1
|H\rangle_3 |H\rangle_5 |H\rangle_7 $, $|V\rangle_1 |V\rangle_3
|V\rangle_5 |V\rangle_7$, $|H\rangle_2 |H\rangle_4 |H\rangle_6
|H\rangle_8$, and $|V\rangle_2 |V\rangle_4 |V\rangle_6
|V\rangle_8$. The extra beam splitter (${\rm BS}_{3/4}$) with vacuum input in mode 4 balances amplitudes in the final state.
For example, after a successful detection in the MQF, the detection of a vertical photon in mode 3 and vacuum in modes 6 and $4$ yields a final state
\begin{eqnarray}
\label{seven:eq03} |{ QdC'_3} \rangle &=& {1\over 2} \big(
|H\rangle_1 |H\rangle_5 |H\rangle_7 - |V\rangle_1 |V\rangle_5
|V\rangle_7 \nonumber \\ &&~~~+ |H\rangle_2 |H\rangle_4
|H\rangle_8 - |V\rangle_2
|V\rangle_4 |V\rangle_8 \big), \nonumber \\
&=& {1\over 2} \big( |\bar0 \rangle |\bar0 \rangle |\bar0 \rangle
- |\bar1 \rangle |\bar1 \rangle |\bar1 \rangle + |\bar2 \rangle
|\bar2 \rangle |\bar2 \rangle - |\bar3 \rangle |\bar3 \rangle
|\bar3 \rangle \big),\nonumber \\
\end{eqnarray}
where the set $\{|\bar0 \rangle, |\bar1 \rangle, |\bar2 \rangle,
|\bar3 \rangle\}$ is defined by $ \{|H\rangle_1$, $|V\rangle_1$,
$|H\rangle_2$, $|V\rangle_2\}$, $ \{|H\rangle_5$, $|V\rangle_5$,
$|H\rangle_4$, $|V\rangle_4\}$, and $ \{|H\rangle_7$,
$|V\rangle_7$, $|H\rangle_8$, $|V\rangle_8\}$.

When the generalized quadbit Hadamard operation and a phase shift are employed on a vertical photon in mode 5 and 4, the outcome state is equivalent to a 3-quadbit cluster state $|{
QdC_3}\rangle$ in Section \ref{Sec:0101}. Therefore, we obtain a three-quadbit cluster state in modes 1,2,4,5,7, and 8 with success probability 1/256.

\begin{figure}[h]
\centering
\includegraphics[width= 9cm]{MQF03} \vspace{-1.5cm}
\caption{ \label{7th} (color online) Circuit $K_2$ for a 4-quadbit cluster state from 2 hyper-entangled pairs}
\end{figure}

\subsubsection{4 quadbit cluster state}
\label{Sec:020202} A slight modification of the circuit in the previous subsection can easily build a 4-quadbit cluster state.
We start from the intermediate state in Eq. (\ref{seven:eq02})
(see Figure \ref{7th}). Because the state does not contain an input of vacuum states in mode 4 and 6, the original QF can be used (see Section \ref{Append0202}). When the original QF is successfully applied in modes 4 and 6 to the outcome in Eq.
(\ref{seven:eq02}), the final state is equal to
\begin{eqnarray}
\label{seven:eq05} |{\rm QdC'_4}\rangle &=& {1\over 2} \big(
|H\rangle_1 |H\rangle_3 |H\rangle_5 |H\rangle_7 + |V\rangle_1
|V\rangle_3 |V\rangle_5 |V\rangle_7 \nonumber \\ && ~+
|H\rangle_2 |H\rangle_4 |H\rangle_6 |H\rangle_8 + |V\rangle_2
|V\rangle_4 |V\rangle_6 |V\rangle_8 \big).\nonumber \\
\end{eqnarray}
This is equivalent to
\begin{eqnarray}
\label{seven:eq055} |{\rm QdC_4}\rangle &=& {1\over 2}
\sum^{3}_{d=0} | +_{d} \rangle | \bar{d} \rangle | +_{d} \rangle
| +_{d} \rangle,
\end{eqnarray}
up to a local operation on the second photon. Note this is a 4-quadbit state of ``star'' form - i.e a central quadbit with three leaves, and thus is useful for creating quadbit clusters with nontrivial topology.

From the resource point of view, two hyper-entangled states and six single photons (four horizontal and two vertical photons) are used to create such a 4 quadbit cluster with success probability 1/1024.

\section{Fusing quadbit cluster states}
\label{Sec:03}

\begin{center}
\begin{figure}[h]
\centering
\includegraphics[width=11cm]{T203} \caption{ \label{8th}
(color online) Circuit $K_3$ of a Type2-like fusion gate on two hyper-entangled pairs}
\end{figure}
\end{center}

In order to perform optical quadbit one-way quantum computation,
we require a procedure for building large multi-quadbit cluster states. The Type-I style gate (of section \ref{Sec:020201}) could be used; however its success probability is very low.

In Figure \ref{8th}, we present a Type-II-like fusion gate between two quadbit cluster states. The total circuit is comprised of two sub-circuits we label $T_3$, consisting of two four-port interferometers. The operation of the $T_3$ gate is discussed in Appendix \ref{Append0201}. The basic effect of gate
$T_3$ is to destroy the spatial mode information carried by the photons while leaving their polarization information in fact.

As shown in Figure \ref{8th}, the initial state is prepared in
$|\Phi^{+}_{\rm HES} \rangle_{1\,2\,3\,4} |\Phi^{+}_{\rm HES}
\rangle_{5\,6\,7\,8}$. What we desire of this gate is that when it succeeds it tells us ``the photons were either in modes 3 and 5 or they were in modes 4 and 6, and their polarization was the same''. However it should not reveal in which pair of spatial modes they were, and with what polarization.

After two $R_{\pi/2}$s in mode 5 and 6, the intermediate state is
\begin{eqnarray}
\label{seven:eq06} &&{1 \over 4} ( | H \rangle_1 | H \rangle_3 +
|V \rangle_1 |V\rangle_3 + | H \rangle_2 | H \rangle_4 + |V
\rangle_2 |V\rangle_4 )\nonumber \\
&&~~~( | V \rangle_5 | H \rangle_7 + |H \rangle_5 |V\rangle_7 + |
V \rangle_6 | H \rangle_8 + |H \rangle_6 |V\rangle_8 ).~~~~~~~~
\end{eqnarray}
Based on the discussion in Appendix \ref{Append0201}, if the upper
$T_3$ gate is implemented (without extra photons in modes $3'$,
$5'$) and a successful detection occurs (i.e. a single horizontal and a single vertical photon are detected in two of the modes 3,
5, $3'$, and $5'$), it generates the Bell state in modes 1 and 7:
\begin{eqnarray}
\label{seven:eq07} (| H \rangle_{1} | H \rangle_{7} \pm |V
\rangle_{1} |V \rangle_{7})/\sqrt{2}.
\end{eqnarray}
Note that the parts of the input state with amplitude in modes 4,
6 are then wiped out.

On the other hand, if the lower $T_3$ gate detects one horizontal and one vertical photon, originating from modes 4 and 6, it makes a Bell state in mode 2 and 8
\begin{eqnarray}
\label{seven:eq08} (| H \rangle_{2} | H \rangle_{8} \pm |V
\rangle_{2} |V \rangle_{8})/\sqrt{2}.
\end{eqnarray}
and amplitude for modes 3 and 5 is wiped out.

We essentially desire both of these $T_3$ gates to be able to succeed simultaneously and indistinguishably. In order to attain this, extra photons are injected into the spatial modes $3'$,
$4'$, $5'$, and $6'$. We will consider various possible initial states for these ancillary photons. The basic idea is that indistiguishable events occur if two photons in different polarizations are detected in both the upper and lower $T_3$
gates simultaneously. These events can arise from either the ancillary photons or the `actual inputs' - and our lack of knowledge about which possibility occurs gives an amplitude for both $T_3$ gates working. The success probability relies on the input state of the extra two photons, and we discuss several possibilities.

The first case is that two single photons are injected in mode
$3'$, $4'$, $5'$, and $6'$ in the state
\begin{eqnarray}
\label{seven:eq09} |{\rm Ex_1}\rangle &=& {1 \over 4} ( | H
\rangle_{3'} + |V \rangle_{3'} + | H \rangle_{4'} + |V
\rangle_{4'} )\,\nonumber \\&& ~~~~~( -| H \rangle_{5'} + |V
\rangle_{5'} -| H \rangle_{6'} + | V \rangle_{6'} ),~~~~~~~
\end{eqnarray}
where each photon is a superposed state in two spatial modes in both polarizations.

When we detect two different polarized photons in the upper $T_3$ gate and two different polarized photons in the lower one, we do not know whether the four photons detected in both $T_3$ gates come from hyper-entangled states or the extra input photons. For example, the upper $T_3$ gate succeeds upon detection of a horizontal photon in mode 3 and a vertical photon in mode 5. The photons could come from any two modes out of modes 3, 5, $3'$, and $5'$. According to Eq. (\ref{seven:eq09}),
the detection works on various input states like $|H \rangle_{3}
|V \rangle_{5}$, $|V \rangle_{3} |H \rangle_{5}$ ,$|H \rangle_{3'}
|V \rangle_{5'}$, and $|V \rangle_{3'} |H \rangle_{5'}$. If the detected photons were $|H \rangle_{3} |V \rangle_{5}$, $|V
\rangle_{3} |H \rangle_{5}$, the remaining state from Eq.
(\ref{seven:eq06}) is equal to the state in Eq.
(\ref{seven:eq07}). However, if the detected photons were $|H
\rangle_{3'} |V \rangle_{5'}$, and $|V \rangle_{3'} |H
\rangle_{5'}$, the lower circuit could be activated by $|H
\rangle_{4} |V \rangle_{6}$, $|V \rangle_{4} |H \rangle_{6}$ and the remaining state equals Eq.(\ref{seven:eq08}). The same logic can be applied the other way around between the upper and lower circuits. Thus, for the successful cases (two different polarized photons detected in each $T_3$ gate), the final state is equivalent to
\begin{eqnarray}
\label{seven:eq10} && {1 \over 2} ( | H \rangle_1 | H \rangle_7 +
| V \rangle_1 | V \rangle_7 + | H \rangle_2 | H \rangle_8 + | V
\rangle_2 | V \rangle_8 ) \,,~~~~~~~~
\end{eqnarray}
which is a superposition state of Eq. (\ref{seven:eq07}) and Eq.
(\ref{seven:eq08}). For this case, the total success probability is 1/64.

We now consider injecting a Bell pair in mode $3'$, $4'$,
$5'$, and $6'$ instead of two single photons such as
\begin{eqnarray}
\label{seven:eq11} |{\rm Ex_2}\rangle &=& {1 \over 2\sqrt{2}}
\big[ ( | H \rangle_{3'} + |V \rangle_{3'})(-| H \rangle_{5'} + |V
\rangle_{5'}) \nonumber \\&&~~~~~+ (| H \rangle_{4'} + |V
\rangle_{4'})(-| H \rangle_{6'} + | V \rangle_{6'} ) \big].
\end{eqnarray}
it can readily be seen that the same indistinguishability of $T_3$
gate operations occurs - in this case with total success probability 1/32.

Finally, the most efficient state to use is the ancillary input a HES
\begin{eqnarray}
\label{seven:eq12} |{\rm Ex_3}\rangle &=& {1 \over 2} ( | H
\rangle_{3'}| V \rangle_{5'} +| V \rangle_{3'}| H \rangle_{5'}
\nonumber \\&&~~~~~+ | H \rangle_{4'}| V \rangle_{6'}+ | V
\rangle_{4'}| H \rangle_{6'}).~~
\end{eqnarray}
In this case the total success probability is 1/16.

Interestingly, even in some failure cases, we still have a chance to have remanent entanglement between two photons in mode 1 (or 2) and 7 (or 8) of Figure \ref{8th}. Without the help of extra photons in the primed modes, the success probability is 1/2 to generate a Bell pair from two HESs through this circuit. If we use two extra photons, the possibility of obtaining some entanglement between 1 (or 2) and 7 (or 8) becomes higher than 1/2. This could possibly be useful for some hybrid qubit/quadbit cluster states computing schemes. For example, we imagine a modified qubit cluster state possessing a HES at the edge and fuse two copies of this state on the HES side in circuit $K_3$. With an extra HES in the primed modes, a HES or a Bell pair is generated among mode 1,
2, 7, and 8 with overall probability 3/4.

We see therefore that we can use this Type2-like circuit to create a Bell pair from HESs. As shown in Figure \ref{8th}, we prepare two HESs with no extra photon. With probability 3/4 we achieve a Bell state. Although this seems perverse - destroying two HES's to create a Bell pair, it raises an interesting possibility of attaching systems which have the form of a HES at the end of the (qubit) cluster state. If we perform this fusion gate on two such photons, it appears that we could fuse the larger qubit cluster state with the probability 3/4.

\section{Summary of some resource tradeoffs}
\label{Sec:04}

\subsection{Difficulties of quantifying tradeoffs}
\label{Sec:0403}

A Bell pair can be created from 4 single photons with probability 1/4 (see Appendix \ref{Append03} for a proof - previously published results \cite{pan} suggested the success probability was 3/16). Such creation is \emph{ballistic} - the single photons are fired in, and (up to some local linear transformation) the desired Bell state is created 1 in 4 times. We could say that a Bell pair is ``worth'' 16 single photons on average - this indicates how much easier things will be if we have a deterministic source of Bell pairs. Now a trivial extension of this ballistic scheme can create a 3-photon GHZ state from 6 single photons with probability 1/32, and can create
(ballistically) a 4-photon GHZ state with probability 1/128 (see Table \ref{tab:MHES}). From this we might conclude a 3-photon GHZ state is worth 96 photons. However we can also create a 3-photon GHZ state by using a Type-I gate \cite{Browne05} and fusing two Bell pairs. The Type-I gate succeeds with probability 1/2, and each Bell pair is worth 16 photons, so this indicates the GHZ state is only worth 64 single photons. The difference, of course,
is that with the latter technique we would have to store the Bell pair, once created, in order for it to be available to combine with the second Bell pair. While the ability to postselect on successfully generated states (and then store them) lies at the heart of why it is we can turn exponentially decreasing probabilities into efficient methods for creating large entangled states, such storage is likely to present practical problems. (It is worth noting that the percolation techniques of
\cite{kieling06} ameliorate many of these issues).

Resource counting is made even messier by the following observation: Sometimes we may require the use of an ancillary entangled state within some larger ballistic circuit (a Bell pair say). One may think that we could replace this Bell pair by 4 single photons (as in Fig. \ref{NewS01} in Appendix
\ref{Append03}) to obtain a ballistic single photon scheme, and only take a hit of 1/4 in the overall success probability of the larger circuit. However the ballistic scheme presumes the ideal state is produced ``up to easily implementable linear optical transformations'' - and it is generally a smaller set of detection outcomes which yield the desired state for input into the larger circuit.

The final feature that makes resource counting difficult is the nature of failure outcomes: sometimes failed gates acting on suitably large input states still leave some of the systems in useful resource states. The potential for recycling (which also requires quantum memory) often greatly complicates the question of optimizing resource counting \cite{Gross}.
\begin{table}[b]
\centering
\begin{tabular}{|cccccc|c|c|}\hline
& & & Resource & & & Output & Probability \\ \cline{1-6}
SP & BP & 3GHZ & 4GHZ & HES &
3QdC & & \\
\hline 4 & & & & &
& BP & 1/4 \\
6 & & & & &
& 3GHZ & 1/32 \\
8 & & & & &
& 4GHZ & 1/128 \\
& 2 & & & &
& 3GHZ & 1/2 \\
& 1 & 1 & & &
& 4GHZ & 1/2 \\
\hline 8 & & & & &
& HES & 1/4096 \\
& 2 & & & &
& HES & 1/16 \\
& & & 1 & &
& HES & 3/16 \\
4 & & & & 2 & & 3QdC & 1/256 \\
6 & & & & 2 & & 4QdC & 1/1024 \\
6 & & & & 1 & 1 & 4QdC & 1/256 \\
2 & & & & & 2 & 4QdC & 1/64 \\
& 1 & & & & 2 & 4QdC & 1/32 \\
& & & & 1 & 2 & 4QdC & 1/16 \\
\hline
\end{tabular}
\caption{ Resource costs for multi-quadbit cluster states (SP =
single photon, BP = Bell pair, HES = hyper-entangled state, and QdC = quadbit cluster)} \label{tab:MHES}
\end{table}
\subsection{Resources for quadbit cluster states}
\label{Sec:0401}

As shown in Table \ref{tab:MHES}, A various combination of resources can be used to create any desired state. Without an entangled source ({i.e.} only single photon sources) one can generate a Bell pair from 4 single photons, a 3-photon GHZ state from 6 photons, and 4-photon GHZ state from 8 photons. However,
using entangled sources, the desired many-photon state can be built with much higher probabilities.

In terms of quadbit cluster states, the counterpart of a Bell pair for qubit is a HES. So, to build a HES requires a source such as 8 single photons, two Bell pairs, or one 4-photon GHZ state. Based on the circuit $J_2$ the optimal probability is 3/16, obtained when using a 4-photon GHZ state.

The bottom of the table shows various ways of building multi-quadbit cluster states by proposed methods with the help of extra photons. We only have one method to build 3 quadbit cluster stated using circuit $K_1$, while several possible methods are available to create a 4 quadbit cluster state through circuits
$K_2$ and $K_3$. For the 4 quadbit cluster state, the success probability without a 3 quadbit cluster state is 1/1024 (using circuit $K_2$).

\section{Conclusion}
\label{Conc} We have initiated the study of building higher dimensional cluster states of photons. Although we have presented several ``modules'' within our constructions that we expect to be of generic use for LOQC using higher dimensional photonic states,
it is unclear to us whether the procedures we have outlined are close to the best possible. If they are, then there seems to be limited advantage in using higher dimensional cluster states built up from single photons from a strict resource counting perspective. It is possible, however, that in the future deterministic sources of hyper-entanglement become available.

Very recently, qubit one-way quantum computation using a hyper-entangled state (HES) has been demonstrated
\cite{new01,new02}. In these papers, a four-qubit cluster state is created from a HES generated by a spontaneous parametric down conversion. They assume that photon's polarization and its spatial modes are defined as a qubit respectively and destroying a single photon performs two single qubit measurements simultaneously. Note that this is quite different to our proposal, where we use a single photon as a higher-dimensional quantum unit. An equivalence between these schemes arises for a 2-quadbit HES and a 4-qubit linear cluster state because
$2+2=2\cdot2$ \cite{new01,new02}.

\acknowledgements

We acknowledge useful discussions with Jens Eisert, David Gross and Konrad Kieling and thank the support of the US Army Research Office (W911NF-05-0397) and the UK EPSRC. This work was supported in part by the UK Engineering and Physical Sciences Research Council through their Quantum Information Processing Interdisciplinary Research Centre, and by the European Union through their networks SCALA and CONQUEST. J.J. was also supported by the Overseas Research Student Award Program.
\title{Foliated manifolds, algebraic $K$-theory, and a secondary invariant}

\begin{abstract}
We introduce an $\C/\Z$-valued invariant for foliated manifolds and with partially flat vector bundles.
Our main result is a formula for the invariant in terms of algebraic $K$-theory and a regulator.
\end{abstract}

\section{Introduction}

In this paper we introduce and analyse an invariant
$$\rho(M,\cF,\nabla^{I},s)\in \C/\Z$$
of an odd-dimensional closed spin manifold $M$ equipped with a real foliation $\cF$, a complex vector bundle with flat partial connection $\nabla^{I}$ in the direction of the foliation, and a stable framing $s$ of the foliation.
In order to define this number we must choose in addition a Riemannian metric on $M$, an extension of the partial connection $\nabla^{I}$ to a connection, and similarly, an extension of the canonical flat partial connection on the normal bundle $\cF^{\perp}$ of the foliation.

Without any further conditions the number $\rho(M,\cF,\nabla^{I},s)$ may depend non-trivially on the additional geometric choices. But if the codimension of the foliation $\cF$ is sufficiently small, namely if \begin{equation}\label{fefwfwefewfewfewewf}
2\mathrm{codim}(\cF)< \dim(M)\ ,
\end{equation} then
$\rho(M,\cF,\nabla^{I},s)$ does not depend on the additional choices.

\bigskip

The quickest way to define the invariant in Definition \ref{flwefjwefewff} is to use the integration in differential complex $K$-theory $\widehat{KU}^{*}$. Alternatively, $\rho(M,\cF,\nabla^{I},s)$ can also be expressed as a combination of $\eta$-invariants of twisted Dirac operators and correction terms involving integrals of characteristic forms and their transgressions, see Proposition \ref{fjwfkljfklfjwelkfjkewjfewlkf9798237982749237432243}.

\bigskip

The invariant $\rho(M,\cF,\nabla^{I},s)$ is very interesting since it combines various classical secondary invariants in spectral geometry, topology and foliation theory in one object.
We will reveal these relations by analysing special cases in Section \ref{jbjkhwekfewfwefewf89798}. We will observe that
$\rho(M,\cF,\nabla^{I},s)$ subsumes Adams' $e$-invariant for framed manifolds, the rho-invariant for Dirac operators twisted with flat bundles,
and classical invariants from foliation theory like the Godbillon-Vey invariant.

\bigskip

While the construction of the invariant $\rho(M,\cF,\nabla^{I},s)$ and the verification of its basic properties are not very deep and based on well-known methods from differential geometry and local index theory we think that its relation with algebraic $K$-theory is much less obvious.
In the present paper we reveal this relation in the special case of a foliated manifold of the form $$(M,\cF)=(P\times X,T_{\C}P\boxplus \{0\})\ .$$
Here $P$ is closed and stably framed. A complex vector bundle
$(V,\nabla^{I})$ with flat partial connection on $M$ provides an algebraic $K$-theory class of the ring of complex-valued smooth functions $C^{\infty}(X)$. We will write this class as $$f^{o_{s}}_{!}([V,\nabla^{I}]^{alg})\in K_{p}(C^{\infty}(X))$$ with notation to be introduced in Section \ref{keklwfewfewfewf}, where
$p:=\dim(P)$. If $p >\dim(X)$ (this is exactly condition \eqref{fefwfwefewfewfewewf}), then we can define a regulator transformation
$${\tt reg}_{X}:K_{p}(C^{\infty}(X))\to {\mathbf{ku}}\C/\Z^{-p-1}(X)\ ,$$ see Definition \ref{klfwefewfewfwf}.
If we now assume that $X$ is a closed spin manifold such that $p+\dim(X)$ is odd, then we have an integration
$$\pi_{!}^{o}:{\mathbf{ku}}\C/\Z^{-n-1}(X)\to {\mathbf{ku}}\C/\Z^{-n-\dim(X)-1}(*)\cong \C/\Z\ ,$$ where
$\pi:X\to *$ and $o$ is the orientation of $\pi$ for ${\mathbf{ku}}\C/\Z$ induced by the spin structure.
Our main result is Theorem \ref{flkfefwefwefewfef}:
\begin{theorem} \label{edhjlqkdjqwdqwdwqdw}
$$\rho(M,\cF,\nabla^{I},s)=\pi_{!}^{o}({\tt reg}_{X}(f^{o_{s}}_{!}([V,\nabla^{I}]^{alg})))\ .$$
\end{theorem}
The proof of this theorem will be finished in Section \ref{keklwfewfewfewf}. It is based on the diagram \eqref{dewewdedewdewd2342343} which comprises various Riemann-Roch type squares for integration in algebraic and topological $K$-theory and their differential refinements.

\bigskip

In Section \ref{dkqwldqwdwqdwqdwqdwqd} we put the regulator ${\tt reg}_{X}$ into its natural general framework. We introduce the algebraic $K$-theory spectrum ${\mathbf{K}}(M,\cF)$ of a foliated manifold and the Hodge-filtered connective complex $K$-theory spectrum ${\mathbf{ku}}^{\flat}(M,\cF)$. The regulator ${\tt reg}_{X}$ used in the theorem above is then a special case of a regulator
$${\tt reg}:{\mathbf{K}}(M,\cF) \to {\mathbf{ku}}^{\flat}(M,\cF) \ .$$
In order to justify to call this map a regulator consider a complex manifold $M $ as a real manifold with a complex foliation $\cF:=T^{0,1}M$. In this case ${\mathbf{K}}(M,T^{0,1}M)$ is the algebraic $K$-theory spectrum of $M$ defined using holomorphic vector bundles. Furthermore, the homotopy groups of
${\mathbf{ku}}^{\flat}(M,T^{0,1}M) $
are the ${\mathbf{ku}}$-theory analogs of the integral Deligne cohomology groups.
The regulator is an integral refinement of a version of Beilinson's regulator.
We will explain all this in detail in Section \ref{dkqwldqwdwqdwqdwqdwqd}.

\bigskip

In the first Sections we introduce basic definitions from the theory of foliated manifolds and characteristic classes.
The experienced reader could skip these sections in a first reading and use them as a reference for notation and normalization conventions. In Section \ref{jfkelwfwefewfewfewfewfwef} we give a quick introduction to the features of differential complex $K$-theory which are used in the construction of the invariant $\rho(M,\cF,\nabla^{I},s)$. The actual construction of this invariant will be given in Section \ref{flkwjefwkejfeflkewjflkefjelkfjelfjewlfewf9790}. As mentioned above, in the two subsequent Sections \ref{fkweflwefewfewfewfwfwfw} and \ref{jbjkhwekfewfwefewf89798} we provide a spectral theoretic interpretation of the invariant and relate it to various classical secondary invariants.

In Section \ref{keklwfewfewfewf} we develop the theory which is necessary to state and prove Theorem \ref{edhjlqkdjqwdqwdwqdw}. Finally, Section \ref{dkqwldqwdwqdwqdwqdwqd} is devoted to the algebraic $K$-theory of foliated manifolds and the regulator in general. This section has a substancial overlap with the work of Karoubi \cite{karoubi45}, \cite{karoubiast} \cite{karoubi43}. In a certain sense it reformulates his constructions using the new technology of the $\infty$-categorical approach to $K$-theory and regulators developed in \cite{Bunke:2013ab}, \cite{Bunke:2012fk}, \cite{Bunke:2013aa}, \cite{Bunke:2014aa}.

\bigskip

{\em Acknowledgement: This work was partially supported by the SFB 1085 ''Higher Invariants'' of the DFG.}

\section{Foliated manifolds}\label{fewl453534535435}

We introduce the category of foliated manifolds $\Mf_{\C-fol}$ and its full subcategory $\Mf_{fol}$ of manifolds with real foliations .

\bigskip

Let $M$ be a smooth manifold and $T_{\C}M:=TM\otimes_{\R}\C$ be the complexified tangent bundle. A section of $ T_{\C}M$ is called a complex vector field. A complex vector field $X\in \Gamma(M,T_{\C}M)$ acts as a derivation $(X,f)\mapsto X(f)$ on the algebra $C^{\infty}(M)$ of complex-valued smooth functions $f$. For a pair of complex vector fields $X,Y $ we can consider the commutator $[X,Y]\in \Gamma(M,T_{\C}M)$. It is the unique complex vector field such that $$[X,Y](f)=X(Y(f))-Y(X(f))$$
for all $f\in C^{\infty}(M)$.

\bigskip

If $\cF\subseteq T_{\C}M$ is a subbundle, then we have an inclusion $\Gamma(M,\cF)\subseteq \Gamma(M,T_{\C}M)$ of spaces of sections.
\begin{ddd} A subbundle $\cF \subseteq T_{\C}M$ is called integrable if for any two sections $X,Y\in \Gamma(M,\cF )$ we also have $[X,Y]\in \Gamma(M,\cF)$.
\end{ddd}

\begin{ddd}
A foliation of a smooth manifold $M$ is an integrable subbundle $\cF \subset T_{\C}M$. A foliated manifold is a pair $(M,\cF )$ of a manifold and a foliation. \end{ddd}

Since $T_{\C}M$ is the complexification of the real vector bundle $TM$ we have a complex antilinear involution $X\mapsto \bar X$. For a subbundle $\cF\subset T_{\C}M$ we let $\bar \cF\subseteq T_{\C}M$ denote the subbundle obtained by applying this automorphism to the elements of $\cF$.
\begin{ddd}
A foliation is called real if $\overline{\cF}=\cF$.
In this case we define the real integrable subbundle $\cF_{\R}:=\cF\cap TM\subseteq TM$.
\end{ddd}

\bigskip

Let $f:M\to N$ be a smooth map between manifolds. Its differential is a map of bundles $$df :T_{\C}M\to f^{*}T_{\C}N$$ over $M$.

\begin{ddd}
We say that $f:(M,\cF )\to (M^{\prime},\cF^{\prime} )$ is a foliated map if its differential preserves the foliations in the sense that
$df (\cF )\subseteq f^{*}\cF^{\prime} $.
\end{ddd}

The composition of two foliated maps is again a foliated map.

\begin{ddd}
We let $\Mf_{\C-fol}$ denote the category of foliated manifolds and foliated maps.
We further let $\Mf_{fol}\subset \Mf_{\C-fol}$ be the full subcategory of foliated manifolds with real foliations.
\end{ddd}

Let $\Mf$ denote the category of smooth manifolds. Then we have functors
$$\Mf_{fol}\to \Mf_{\C-fol}\to \Mf\ ,$$
where the first is the inclusion of a full subcategory, and the second forgets the foliation.
The category of foliated manifolds has a cartesian product. It is given by
$$(M,\cF)\times (M^{\prime},\cF^{\prime})\cong (M\times M^{\prime},\cF\boxplus \cF^{\prime})\ .$$

\begin{ex}{\rm If $M$ is a complex manifold, then the subbundle $\cF :=T^{0,1}M\subseteq T_{\C}M$ is a complex foliation. Vice versa, a complex foliation $\cF $ with the additional property that $\cF \oplus \overline{\cF }\cong T_\C M$ equips $M$ with a complex structure such that $T^{0,1}M=\cF$.
Moreover, a foliated map between such foliated manifolds is the same as a holomorphic map.
}
\end{ex}

\begin{ex}\label{lkjwqdwdqwdqwdqwd}{\rm Every manifold $M$ has a minimal foliation $\cF_{min}:=\{0\}$ and a maximal foliation $\cF_{max} :=T_{\C}M$.
These foliations are real. If $M$ is equipped with the minimal foliation and $(M^{\prime},\cF^{\prime})$ is a foliated manifold, then every smooth map $M\to M^{\prime}$ is foliated.
Similarly, if $M^{\prime}$ is equipped with the maximal foliation, then for a foliated manifold $(M,\cF)$ every smooth map $M\to M^{\prime}$ is foliated.
}
\end{ex}

\begin{ex}\label{djqlwdqwdqwdq}{\rm Let $\pi:W\to B$ submersion. Then the complexification of the vertical bundle $T^{v}\pi:=\ker(d\pi)\subseteq TW$
defines a real foliation $\cF^{v}$ called the vertical foliation. The map $\pi$ is foliated for any choice of a foliation on $B$.
}
\end{ex}

\begin{ex}{\rm Let $\Gamma$ be a discrete group which acts freely and properly on a manifold $\tilde B$ from the right with quotient $B:=\tilde B/\Gamma$. Furthermore, let $X$ be a manifold with a left action of $\Gamma$. Then we consider the manifold
$M:=\tilde B\times_{\Gamma} X$. The vertical foliations $\tilde \cF^{v} $ and $\tilde \cF^{H} $ associated to the projections $\tilde B\times X\to \tilde B$ and $\tilde B\times X\to X$ descend to the quotient and define the vertical and horizontal foliations $\cF^{v} $ and $\cF^{H} $ on $M$. Note that $ \cF^{v} $ is the vertical foliation of the submersion $ M\to B$. We have $\cF^{H}\oplus \cF^{v}\cong T_{\C} M$. }
\end{ex}

\begin{ex}\label{dekldlqwdqwdqwd}{\rm

Let $(B,\cF)$ be a foliated manifold. We call a map $f:W\to B$ transversal to $\cF$ if for every $w\in W$ we have the relation $\cF_{f(w)}+df(TW_{x})=T_{f(x)}B$. If $f$ is transversal to $\cF$, then we can define a maximal foliation $f^{-1}\cF$ on $W$ such that $f$ becomes a foliated map. We must set $f^{-1}\cF:=df^{-1}(f^{*}\cF)$.

\bigskip

In particular, if $P$ is a manifold, then we can consider the projection $\pi:P\times B\to B$.
In this case, $ \pi^{-1}\cF=T_{\C}P\boxplus \cF$.

}
\end{ex}

\section{Filtrations on the de Rham complex}

A foliation on a manifold induces a decreasing multiplicative filtration of the de Rham complex.

\bigskip

We consider a foliated manifold $(M,\cF)$.
By $(\Omega(M),d)$ denote the complexified de Rham complex of $M$.

\begin{ddd}\label{ilfjewlfwfewfewfewfwfw}
For $n,p\in \nat$ we define the subspace $$F^{p}\Omega^{n}(M)\subseteq \Omega^{n}(M)$$ of forms which vanish after the insertion of $n-p+1$ sections of $\cF$.
\end{ddd}

The family of these subspaces for all $p$ forms a decreasing filtration of $\Omega^{n}(M)$. More precisely we have the following chain of inclusions
$$ \Omega^{n}(M)=F^{0}\Omega^{n}(M) \supseteq F^{1}\Omega^{n}(M)\supseteq \dots \supseteq F^{\mathrm{codim}(\cF)}\Omega^{n}(M)\supseteq F^{\mathrm{codim}(\cF)+1}\Omega^{n}(M)=0\ .$$
Combining these filtrations for all $n$ together we get a decreasing filtration $(F^{p}\Omega(M))_{p\in \nat}$ of the graded commutative algebra $\Omega(M)$ which is multiplicative, i.e. the wedge product restricts to maps \begin{equation}\label{hjkqhdkdhwqkdwqd98789}
\wedge :F^{p}\Omega^{m}(M)\otimes F^{q}\Omega^{n}(M)\to F^{p+q}\Omega^{m+n}(M)\ .\end{equation}

These properties hold in fact true for arbitrary subbundles $\cF$ of $T_{\C}M$. But as a consequence of integrability of $\cF$ this filtration is also preserved by the de Rham differential, i.e.
$(F^{p}\Omega(M),d)$ is a subcomplex of $(\Omega(M),d)$ for every $p\in \nat$.

\bigskip

\begin{ddd}
For a foliated manifold $(M,\cF)$ we write $\Omega(M,\cF)$ for the de Rham complex $\Omega(M)$ considered as a filtered commutative differential graded algebra.
\end{ddd}

\begin{ex}{\rm If $(M,\cF)$ is a complex manifold, then the filtration on $\Omega(M,T^{0,1}M)$
is called the Hodge filtration. The associated spectral sequence is called the Hodge-de Rham spectral sequence.
}\end{ex}

\begin{ex}{\rm If $W\to B$ is a submersion and $\cF^{v}$ is the vertical foliation (Example \ref{djqlwdqwdqwdq}) on $W$, then the spectral sequence associated to the filtration of the de Rham complex $\Omega(W,\cF^{v})$ is the Leray-Serre spectral sequence.}\end{ex}

If $f:(M,\cF)\to (M^{\prime},\cF^{\prime})$ is a foliated map, then $f^{*}:\Omega(M^{\prime},\cF^{\prime})\to \Omega(M,\cF)$ is a morphism of filtered commutative differential graded algebras.

\bigskip

We let $\mathbf{CDGA}$ and $\mathbf{CDGA}^{filt}$ denote the categories of graded commutative differential graded algebras and filtered graded commutative differential graded algebras. For categories $\bC,\bD$ we can consider the category $${\mathbf{PSh}}_{\bD}(\bC):=\Fun(\bC^{op},\bD)$$ of $\bD$-valued presheaves on $\bC$
We can formalize the properties of the filtered de Rham complex discussed above by saying that we have a presheaf
$$\Omega\in {\mathbf{PSh}}_{ \mathbf{CDGA}^{filt}}(\Mf_{\C-fol})\ .$$

\begin{ddd}\label{jkdjlqwdqwdqwd} We define the presheaves of graded commutative differential graded algebras $$DD^{-}\in {\mathbf{PSh}}_{\mathbf{CDGA}}(\Mf_{\C-fol})\ , \quad DD^{per}\in {\mathbf{PSh}}_{\mathbf{CDGA}}(\Mf)$$ by
$$DD^{-}(M,\cF) :=\prod_{p\in \Z} F^{p}\Omega (M)[2p]\ , \quad DD^{per}:=\prod_{p\in \Z} \Omega(M)[2p]\ .$$
\end{ddd}
We call $DD^{per}$ the periodic and $DD^{-}$ the negative de Rham complex.
Note that $DD^{-}$ has a decomposition into a product of components $$DD^{-}(M,\cF)\cong \prod_{p\in \Z} DD^{-}(M,\cF)(p)\ , \quad DD^{-}(M,\cF)(p) :=F^{p}\Omega (M)[2p]\ ,$$
and the product on $DD^{-}$ is induced by the wedge products of forms \eqref{hjkqhdkdhwqkdwqd98789} componentwise as $$DD^{-}(M,\cF)(p)\otimes DD^{-}(M,\cF)(q)\to DD^{-}(M,\cF)(p+q)\ .$$
The description of the product for $DD^{per}$ is similar.

\begin{rem}{\rm The cohomology of $DD^{per}(M)$ is the two-periodic de Rham cohomology of $M$.
It is the natural target of the Chern character from topological $K$-theory, see Definition \ref{ffwefwefewfewfwfw}.
The complex $DD^{-}(M,\cF)$ will receive characteristic forms for vector bundles with connections which are flat in the direction of the foliation, see Definition \ref{qldjqwldqwdqwdqwd}.
}\end{rem}

\section{Vector bundles with flat partial connections}\label{fhfjlwefkjfewfewfewfwf}

We introduce the notion of a vector bundle with a flat partial connection on a foliated manifold.

\bigskip

We consider a foliated manifold $(M,\cF)$.
Let $V\to M$ be a complex vector bundle.

\begin{ddd}\label{lwfwfewfewfewf}
A partial connection on $V$ is a map
$$\nabla:\Gamma(M,V)\to \Gamma(M,\cF^{*}\otimes V)$$ which satisfies the Leibniz rule.
\end{ddd}

\begin{rem}{\rm For sections $X\in \Gamma(M,\cF)$ and $\phi\in \Gamma(M,V)$ we write as usual $$\nabla_{X}\phi:=i_{X}(\nabla(\phi))
\in \Gamma(M,V)$$ for the evaluation of $\nabla\phi$ at $X$. With this notation the Leibniz rule has the form
$$\nabla_{X}(f\phi)=X(f)\phi+f\nabla_{X}\phi\ , \quad \forall f\in C^{\infty}(M)\ , \quad \forall\phi\in \Gamma(M,V)\ , \quad \forall X\in \Gamma(M,\cF)\ .$$
}\end{rem}

The foliation gives rise to a graded commutative differential graded algebra whose underlying commutative graded algebra is given by $\Omega(\cF):=\Gamma(M,\Lambda^{*}\cF)$. Its differential $d^{\cF}$ is fixed by the prescription \begin{equation}\label{jdgjdhjdhk1j2hdkjhd2d2ud2du1d12d}
d^{\cF}:\Omega^{0}(\cF)\to \Omega^{1}(\cF)\ , \quad d^{\cF}(\phi):=d\phi_{|\cF}\ ,
\end{equation}
where $d$ is the usual de Rham differential and we use the identification $\Omega^{0}(\cF)= C^{\infty}(M)=\Omega^{0}(M)$.
We further write $\Omega(\cF,V):=\Gamma(M,\Lambda^{*}\cF\otimes V)$.
As in the case of usual connections we can extend $\nabla$ uniquely to a derivation on the $\Omega(\cF)$-module $\Omega(\cF,V)$.
Its curvature defined by $$R^{\nabla}:=\nabla^{2}\in \End( \Omega(\cF,V))$$ is $\Omega(\cF) $-linear and hence a two-form on $\cF$ with values in $\End(V)$, i.e. we have
$R^{\nabla}\in \Omega^{2}(\cF, \End(V))$.
\begin{ddd}\label{ijflkewfewfoieoiuoiuoiuoiuwef}
A partial connection $\nabla$ on $V$ is called flat if $R^{\nabla}=0$.
\end{ddd}

We now consider a foliated map $f:(M^{\prime},\cF^{\prime})\to (M,\cF)$.
If $V\to M$ is a vector bundle with a partial connection $\nabla$, then $f^{*}V$
has an induced partial connection $f^{*}\nabla$.
It is characterized by
\begin{equation}\label{gf433g34g3353454353453tr}
\nabla_{X}(f^{*}\phi)=f^{*}\nabla_{df(X)}\phi\ , \quad \forall m^{\prime}\in M^{\prime}\ , \quad \forall X\in T_{m^{\prime}}M^{\prime}\ , \quad \phi\in \Gamma(M,V)\ .
\end{equation}
This formula has to be understood as an equality between elements in the fibre $(f^{*}V)_{m^{\prime}}$.
Because of the relation
$$f^{*}R^{\nabla}=R^{f^{*}\nabla}$$
the pull-back of a flat partial connection is again flat.

\begin{ex}{\rm If $M$ is a complex manifold with foliation $\cF=T^{0,1}M$, then a flat partial connection on a complex vector bundle $V$ is the same as a holomorphic structure. In this situation the flat partial connection is usually denoted by $\bar \partial$.
}
\end{ex}

\begin{ex}{\rm If $M$ is equipped with the minimal foliation, then a partial connection on a vector bundle is no additional data.
In the opposite case, where $M$ has the maximal foliation, a flat partial connection is the same as a flat connection.}
\end{ex}
\begin{ex}\label{fklwefjwefewf}{\rm Let $$\cF^{\perp}:=T_{\C}M/\cF$$ be the normal bundle of a foliation.
Then $\cF^{\perp}$ has a natural flat partial connection $\nabla^{\cF^{\perp}}$. It is given by
$$\nabla_{X}[Y]:= [\ [X,Y]\ ]\ ,$$ where $ X\in \Gamma(M,\cF)$ and the vector field $Y\in \Gamma(M,T_{\C}M)$ represents the section $[\ Y\ ] \in \Gamma(M,\cF^{\perp})$ of the normal bundle.}
\end{ex}

\begin{ex}\label{jfwlefjlwefewfewfewfw}{\rm Let $\pi:W\to B$ be a submersion and consider the vertical foliation $T^{v}\pi$ on $W$, see Example \ref{djqlwdqwdqwdq}.
If $V\to B$ is any vector bundle, then $\pi^{*}V\to W$ has a canonical flat partial connection $\nabla^{I}=\pi^{*}\nabla$, where $\nabla$ is the canonical flat partial connection on $V$ in the direction of trivial foliation. In view of \eqref{gf433g34g3353454353453tr}
it is characterized by the condition that for $\phi\in \Gamma(B,V)$ we have $\nabla^{I}\pi^{*}\phi=0$.
}
\end{ex}

\section{Connections and characteristic forms}

We introduce the Chern character forms and Chern forms of complex vector bundles with connection. In the foliated case we discuss the consequences of the fact that the connection extends a flat partial connection.

\bigskip

Let $(M,\cF)$ be a foliated manifold and $(V,\nabla^{I})$ be a complex vector bundle with a flat partial connection.

\begin{ddd} \label{fjwelfwfewf42343242wf}
A connection $\nabla$ on $V$ is an extension of $\nabla^{I}$, if the relation
$\nabla_{X}\phi=\nabla^{I}_{X}\phi$ holds for all $\phi\in \Gamma(M,V)$ and $X\in \Gamma(M,\cF)$.
\end{ddd}

One can show that a flat partial connection admits extensions. Furthermore, the set of extensions of a flat partial connection is a torsor over the complex vector space
$$\Gamma(M, \cF^{\perp,*}\otimes \End(V))\ .$$

\begin{ex}{\rm A connection on $\cF^{\perp}$ which extends the partial flat connection $\nabla^{I,\cF^{\perp}}$ of Example \ref{fklwefjwefewf} is called a Bott connection.}
\end{ex}

\begin{ex}\label{fjewflewjflewfewf}{\rm Let $f:W\to B$ be a submersion and $\cF^{v}$ be the vertical foliation (Example \ref{djqlwdqwdqwdq}). If $V\to B$ is a complex vector bundle, then
$f^{*}V\to W$ has a canonical flat partial connection $\nabla^{I}$, see Example \ref{jfwlefjlwefewfewfewfw}. If $\nabla$ is any connection on $V$, then $f^{*}\nabla$ extends $\nabla^{I}$.

More generally, if $f:W\to B$ is transverse to a foliation $\cF$ on $B$ and $(V,\nabla^{I})$ is a vector bundle with flat partial connection on $B$, then $(f^{*}V,f^{*}\nabla^{I})$ is a vector bundle with flat partial connection on $W$, where $W$ is equipped with the foliation $f^{-1}\cF$, see Example \ref{dekldlqwdqwdqwd}. If $\nabla$ is a connection on $V$ extending $\nabla^{I}$, then $f^{*}\nabla $ is a connection on $f^{*}V$ extending $f^{*}\nabla^{I}$.

}
\end{ex}

If $\nabla$ is a connection on a complex vector bundle, then we consider its curvature
$$R^{\nabla}:=\nabla^{2}\in \Omega^{2}(M,\End(V))\ .$$ The Chern character form of $\nabla$ is the closed inhomogeneous complex-valued form
$$\ch_{0}(\nabla)+\ch_{2}(\nabla)+\ch_{4}(\nabla)+\dots:=\Tr \exp(-\frac{R^{\nabla}}{2\pi i})$$
with homogeneous components $\ch_{2i}(\nabla)\in \Omega^{2i}_{cl}(M)$. We will consider the Chern character form as a zero cycle in the periodic complex $DD^{per}(M)$.

\begin{ddd}\label{ffwefwefewfewfwfw}
We define
$$\ch(\nabla):=(\ch_{2p}(\nabla))_{p\in \Z}\in Z^{0}(DD^{per}(M))\ .$$
\end{ddd}

\begin{rem}\label{dkjqwlkdjqwlkjdlwqkdwdwqdqwd}{\rm In this remark we explain how the Chern character form behaves under complex conjugation and inserting adjoint connections. First of all,
if we choose a hermitean metric $h$ on the bundle $V$ with connection $\nabla$, then we can form the adjoint connection $\nabla^{*}$ which is characterized by the relation in $\Omega^{1}(M)$:
$$dh(\phi,\psi)=h(\nabla\phi,\psi)+h(\phi,\nabla^{*}\psi)\ , \quad \mbox{for all}\:\phi,\psi\in \Gamma(M,V)\ .$$ Applying $d$ to this equality again we get
$$0=h(R^{\nabla}\phi,\psi)+h(\phi, R^{\nabla^{*}}\psi)\ .$$ In view of the $2\pi i$-factor in the definition of the Chern character form this equality implies the relation \begin{equation}\label{ccewcwecweewwerewr}
\overline{\ch(\nabla)}=\ch(\nabla^{*})\ .
\end{equation}
The connection $\nabla$ is called unitary if $\nabla^{*}=\nabla$. In this case, the Chern character form $\ch(\nabla)$ is real.}
\end{rem}

Let $(V,\nabla^{I})$ be a complex vector bundle on $(M,\cF)$ with a flat partial connection $\nabla^{I}$. We further choose a hermitean metric $h$ on $V$.

\begin{ddd}\label{kdlqwdqwdqwdqd}
A flat partial connection $\nabla^{I}$ is called unitary (with respect to $h$), if $$ d^{\cF}h(\phi,\psi)=h(\nabla^{I}\phi,\psi)+h(\phi,\nabla^{I}\psi)$$ for all $\phi,\psi\in \Gamma(M,V)$.
\end{ddd}
See \eqref{jdgjdhjdhk1j2hdkjhd2d2ud2du1d12d} for $d^{\cF}$.

\begin{lem}\label{dhqwkdqkwddqwdwqdioipopioipoopi}
If $\nabla^{I}$ is unitary (with respect to $h$), then it admits an unitary extension $\nabla$.
\end{lem}
\proof Let $\nabla_{0}$ be some extension of $\nabla^{I}$. Then $\nabla^{*}_{0}$ is a second extension of $\nabla^{I}$ and
$$\nabla:=\frac{1}{2}(\nabla_{0}+\nabla_{0}^{*})$$
is an unitary extension of $\nabla^{I}$. \hB

\bigskip

Note that the filtration of $\Omega(M,\cF)$ introduced in Definition \ref{ilfjewlfwfewfewfewfwfw} induces a filtration on $\Omega(M,\End(V))$ which is compatible with the $\Omega(M,\cF)$-module structure.

\begin{lem}If $\nabla$ extends a flat partial connection $\nabla^{I}$ on $V$, then
$R^{\nabla}\in F^{1}\Omega^{2}(M,\End(V))$.
\end{lem}
\proof We have
$$R^{\nabla}_{|\Lambda^{2}\cF }=R^{\nabla^{I}}=0\ .$$ \hB This Lemma has consequences for the Chern character forms.

\begin{kor}\label{fwefwefewfewfewfewf2434ewfewfwefewf}
If $\nabla$ extends a flat partial connection on $V$,
$$\ch_{2i}(\nabla)\in F^{i}\Omega^{2i}_{cl}(M,\cF)\ .$$
\end{kor}

\begin{ddd}\label{qldjqwldqwdqwdqwd}
If $\nabla$ extends a flat partial connection on $V$,
then we define
$$\ch^{-}(\nabla):=(\ch_{2p}(\nabla))_{p\in \Z}\in Z^{0}(DD^{-}(M,\cF))\ .$$
\end{ddd}

We let ${\mathbf{Vect}}^{\flat,\nabla}(M,\cF)$ and ${\mathbf{Vect}}^{\nabla}(M)$ denote the symmetric monoidal categories (with respect to the direct sum) of pairs $(V,\nabla)$ of complex vector bundles with connection, where in the first case $\nabla$ extends a flat partial connection. In both cases morphisms are connection preserving vector bundle morphisms.

If $f:M^{\prime} \to M $ is a smooth map and $(V,\nabla)\in {\mathbf{Vect}}^{\nabla}(M)$, then we can define $(f^{*}V,f^{*}\nabla)\in {\mathbf{Vect}}^{\nabla}(M^{\prime})$ and have the relation \begin{equation}\label{wqdqwdqwdwqdwqwqdqd}
f^{*}\ch(\nabla)=\ch(f^{*}\nabla)\ .\end{equation}
Similarly, if $f:(M^{\prime},\cF^{\prime})\to (M,\cF)$ is a foliated map and $(V,\nabla)\in {\mathbf{Vect}}^{\flat,\nabla}(M,\cF)$, then
$(f^{*}V,f^{*}\nabla)\in {\mathbf{Vect}}^{\flat,\nabla}(M^{\prime},\cF^{\prime})$ and we have the relation
\begin{equation}\label{wqdqwdqwdwqdwqwqdqd1}
\ch^{-}(f^{*}\nabla)=f^{*}\ch^{-}(\nabla)\ .\end{equation}
Thus the Chern character forms are characteristic forms. In addition, they are additive, i.e. the Chern character form of a direct sum is the sum of the Chern character forms of the summands.
These properties will be important for the construction of the regulator in Subsection \ref{kfjwelfewfewfewfewfewfewfe}.

\bigskip

Let $(V,\nabla)$ be a complex vector bundle with connection on a manifold $M$. Then we define the Chern forms $$c_{2i}(\nabla)\in \Omega^{2i}(M)$$ of $\nabla$ as the homogeneous components of the following inhomogenous form
$$1-c_{1}(\nabla)+c_{2}(\nabla)-\dots=\det(1-\frac{R^{\nabla}}{2\pi i})\ .$$
The Chern forms can be expressed as homogeneous polynomials in the Chern character forms. In particular, if $(M,\cF)$ is a foliated manifold and $\nabla$ extends a flat partial connection, then we have \begin{equation}\label{jlkjlwejflkewjfewf89798}
c_{2i}(\nabla)\in F^{i}\Omega^{2i}_{cl}(M,\cF)\ .\end{equation}

\section{Characteristic forms of real foliations}

We introduce characteristic forms of real vector bundles on real foliated manifolds. We in particular discuss the $\hA$-form.

\bigskip

We consider a foliated manifold $(M,\cF)$. If $V\to M$ is a real vector bundle on a real foliated manifold $(M,\cF)$, then in analogy with Definitions \ref{lwfwfewfewfewf} and \ref{ijflkewfewfoieoiuoiuoiuoiuwef} we have the notion of a flat partial connection $$\nabla^{I}:\Gamma(M,V)\to \Gamma(M,\cF_{\R}^{*}\otimes V)$$ on $V$.
We furthermore have the notion of a connection $\nabla$ on $V$ extending $\nabla^{I}$ (compare with Definition \ref{fjwelfwfewf42343242wf})

\bigskip

Let $V\to M$ be a real vector bundle with connection $\nabla$. We let $\nabla_{\C}$ denote the induced connection on the complexification $V\otimes \C$.
We define the Pontrjagin forms of $\nabla$ by $$p_{i}(\nabla):=(-1)^{i}c_{2i}(\nabla_{\C})\in \Omega_{cl}^{4i}(M)\ .$$
If $(M,\cF)$ is real foliated and $\nabla$ extends a flat partial connection, then by \eqref{jlkjlwejflkewjfewf89798} we have \begin{equation}\label{wwejoijiofwef9789}
p_{i}(\nabla)\in F^{2i}\Omega^{4i}_{cl}(M,\cF)\ .\end{equation}

\bigskip

In order to define the $\hA$-form we consider the symmetric polynomials $p_{i}$ of degree $4i$ in variables $x_{\ell}$ of degree $2$ defined by the relation
$$1+p_{1} +p_{2} +\dots=\prod_{\ell} (1-x_{\ell}^{2})\ .$$
We define homogeneous polynomials $\hA_{4k} $ for $k\ge 1$ in the variables $p_{i}$ by the relation
$$ 1+\hA_{4} +\hA_{8} +\dots=\prod_{\ell}\frac{\frac{x_{\ell}}{2}}{\sinh(\frac{x_{\ell}}{2})}\ .$$
Then the components of the $\hA$-form are defined by
$$\hA_{4i}(\nabla):=\hA_{4i}(p_{1}(\nabla),p_{2}(\nabla),\dots) \in \Omega_{cl}^{4i}(M)\ .$$
If $(M,\cF)$ is real foliated and the connection $\nabla$ extends a flat partial connection, then by \eqref{wwejoijiofwef9789} we have
$$\hA_{4i}(\nabla)\in F^{2i}\Omega^{4i}_{cl}(M,\cF)$$

\bigskip

We again want to consider the $\hA$-form as a zero cycle of $DD^{per}(M)$, or of $DD^{-}(M,\cF)$ in the foliated case. In order to simplify the notation we set $\hA_{2i}(\nabla):=0$ if $i$ is odd.

\begin{ddd} If $\nabla$ is a connection on a real vector bundle on $M$, then we define
$$\hA(\nabla):=(\hA_{2p}(\nabla))_{p\in \Z}\in Z^{0}(DD^{per}(M))\ .$$
If $(M,\cF)$ is a real foliated manifold and $\nabla$ extends a flat partial connection, then we define
$$\hA^{-}(\nabla):=(\hA_{2p}(\nabla))_{p\in \Z}\in Z^{0}(DD^{-}(M,\cF))\ .$$
\end{ddd}

The $\hA$-form is multiplicative, i.e. the $\hA$-form of a direct sum of connections is the product of $\hA$-forms. Furthermore, the $\hA$-form of a trivial connection is the multiplicative unit.

\begin{ex}\label{kjlwefwefwef}{\rm If $(M,\cF)$ is a real foliated maniflold, then the real normal bundle $$\cF_{\R}^{\perp}:=TM/\cF_{\R}$$ of the foliation has a flat partial connection $\nabla^{I,\cF_{\R}^{\perp}}$ similar as in Example \ref{fklwefjwefewf}. If we choose a connection$ \nabla^{\cF_{\R}^{\perp}}$ extending $\nabla^{I,\cF_{\R}^{\perp}}$, then we obtain a cycle
$$\hA^{-}(\nabla^{\cF_{\R}^{\perp}})\in Z^{0}(DD^{-}(M,\cF))\ .$$
}
\end{ex}

\section{Transgression}

We introduce the transgression of characteristic forms and discuss its basic properties. In the case of foliations we discuss the consequences of the fact that the connections extend a fixed flat partial connection.

\bigskip

We consider the unit interval $I:=[0,1]$ with coordinate $t$.
For $i=0,1$ let $\iota_{i}:*\to I$ be the inclusions of the endpoints of the interval.
Let $M$ be a smooth manifold and $V\to M$ be a vector bundle.
Given two connections $\nabla_{0}$ and $\nabla_{1}$ we can consider a connection $\tilde \nabla$ on $\pi^{*}V\to I\times M$ such that $(\iota_{i}\times \id_{M})^{*}\tilde \nabla=\nabla_{i}$ for $i=0,1$. For example we could take the linear interpolation $t\pi^{*}\nabla_{1}+(1-t)\pi^{*}\nabla_{0}$.
\bigskip

The integration of forms along the fibre of $\pi:I\times M\to M$ is a map of graded vector spaces
$$\int_{I\times M/M}:\Omega(I\times M)\to \Omega(M)[-1]\ .$$ It induces a map
$$\int_{I\times M/M} :DD^{per}(I\times M)\to DD^{per}(M)[-1]\ .$$ Since the interval $I$ has a non-empty boundary the integration is not a morphism of complexes. In fact,
by Stoke's theorem we have the relation
\begin{equation}\label{dqwdwqdwqdqdw}
(\iota_{1}\times \id_{M})^{*}-(\iota_{0}\times \id_{M})^{*}=d\circ \int_{I\times M/M}+\int_{I\times M/M}\circ d\ .
\end{equation} \begin{ddd}
The transgression of the Chern character form is defined by
$$\widetilde{\ch}(\nabla_{1},\nabla_{0}):=\int_{I\times M/M} \ch(\tilde \nabla)\in DD^{per}(M)^{-1}/\im(d)\ .$$
\end{ddd}
From \eqref{dqwdwqdwqdqdw} and the facts that the Chern character forms are closed and natural (see \eqref{wqdqwdqwdwqdwqwqdqd})
we immediately get the identity \begin{equation}\label{hfjwkjehfkjwehfewfewf897987}
d \widetilde{\ch}(\nabla_{1},\nabla_{0}):=\ch(\nabla_{1})-\ch(\nabla_{0})\ .
\end{equation}
One can check that transgression is independent of the choice of the connection $\tilde \nabla$ interpolating between $\nabla_{0}$
and $\nabla_{1}$. At this point it is relevant that we consider the transgression as a class modulo exact forms.
Furthermore, we have the identities \begin{equation}\label{fewfwefwefwefew32434234234}
\widetilde{\ch}(\nabla_{1},\nabla_{0})+\widetilde{\ch}(\nabla_{2},\nabla_{1})+\widetilde{\ch}(\nabla_{0},\nabla_{2})=0\ , \quad \widetilde{\ch}(\nabla_{1},\nabla_{0})+\widetilde{\ch}(\nabla_{0},\nabla_{1})=0\ .
\end{equation}
In order to see e.g. the first equality in \eqref{fewfwefwefwefew32434234234} one can integrate the Chern form of an interpolation between the three connections over a two-simplex.
\begin{rem}{\rm If we choose a hermitean metric on $V$, then we can form the adjoint connections. From
\eqref{ccewcwecweewwerewr} we get the relation \begin{equation}\label{qwdqwdqwdwqdqwdwqdqwdqwd}
\overline{\widetilde{\ch}(\nabla_{1},\nabla_{0})} =\widetilde{\ch}(\nabla^{*}_{1},\nabla^{*}_{0})\ .
\end{equation}
}\end{rem}

\bigskip

We now assume that $(M,\cF)$ is foliated and that the connections $\nabla_{i}$ for $i=0,1$ extend the same flat partial connection $\nabla^{I}$.
Then we can equip $I\times M$ with the foliation $T_{\C}I\boxplus \cF$ introduced in Example \ref{dekldlqwdqwdqwd}.
We can furthermore find an interpolation $\tilde \nabla$ which extends the flat partial connection $\pi^{*}\nabla^{I}$, e.g. the linear interpolation.

\bigskip

We now observe that the integration preserves the filtration, i.e.
$$\int_{I\times M/M}:F^{p}\Omega^{k}(I\times M,T_{\C}I\boxplus \cF)\to F^{p}\Omega^{k-1}(M,\cF) \ .$$
Hence we get an induced map
$$\int_{I\times M/M}:DD^{-}(I\times M,T_{\C}I\boxplus \cF)\to DD^{-}(M,\cF)[-1]\ .$$

\begin{ddd} Let $(M,\cF)$ be a foliated manifold and $(V,\nabla^{I})$ be a complex vector bundle with a flat partial connection on $M$. If $\nabla_{0}$ and $\nabla_{1}$ are two connections on $V$ extending $\nabla^{I}$, then we define transgression of the Chern character form by
$$\widetilde{\ch^{-}}(\nabla_{1},\nabla_{0}):=\int_{I\times M/M} \ch^{-}(\tilde \nabla)\in DD^{-}(M,\cF)^{-1}/\im(d)\ .$$
\end{ddd}

Note again, that $\widetilde{\ch^{-}}(\nabla_{1},\nabla_{0})$ is independent of the choice of the interpolation $\tilde \nabla$.

\begin{ex}{\rm We consider a foliated manifold $(M,\cF)$ and a complex vector bundle $V \to M$.
If $p\in \nat$ is such that $p>\mathrm{codim}(\cF)$, then we have $F^{p}\Omega^{2p}(M)=0$.

\bigskip

Assume that $\nabla^{I}_{0}$ and $\nabla^{I}_{1}$ are two flat partial connections on a complex vector bundle $V$ and $\nabla_{0}$, $\nabla_{1}$ be corresponding extensions. If $p>\mathrm{codim}(\cF)$, then
$\widetilde{\ch}_{2p}(\nabla_{1},\nabla_{0})$ is closed since by \eqref{dqwdwqdwqdqdw} and Corollary \ref{fwefwefewfewfewfewf2434ewfewfwefewf} its differential belongs to $F^{p}\Omega^{2p}(M)=0$.
Its cohomology class does not depend on then choice of the extensions $\nabla_{1}$ and $\nabla_{0}$.
We therefore get a secondary characteristic class $$c(\nabla_{1}^{I},\nabla_{0}^{I}):=\widetilde{\ch}_{2p}(\nabla_{1},\nabla_{0})\in H^{2p-1}(M;\C)\ .$$
}\end{ex}

\begin{ex}{\rm The following construction generalizes the Kamber-Tondeur classes (introduced in this form in \cite{MR1303026}) to the foliated case.
Let $\nabla^{I}$ be a flat partial connection. If we choose a hermitean metric $h^{V}$ on $V$, then we can define an adjoint flat partial connection $\nabla^{I,*}$.
It is characterized by the relation
$$d^{\cF}h(\phi,\psi)=h(\nabla^{I}\phi,\psi)+h(\phi,\nabla^{I,*}\psi)\ , \quad \phi,\psi\in \Gamma_{c}(M,V)\ .$$

Let $\nabla$ be an extension of $\nabla^{I}$. Then $\nabla^{*}$ extends $\nabla^{I,*}$.
We consider the form $$\widetilde{\ch}_{2p}(\nabla,\nabla^{*})\in F^{p}\Omega^{2p-1}(M,\cF)/\im(d)\ .$$
By \eqref{qwdqwdqwdwqdqwdwqdqwdqwd} and \eqref{fewfwefwefwefew32434234234} we have the relation
\begin{equation}\label{dewdwedwedewded}
\overline{\widetilde{\ch}_{2p}(\nabla,\nabla^{*})}= \widetilde{\ch}_{2p}(\nabla^{*},\nabla)= -\widetilde{\ch}_{2p}(\nabla,\nabla^{*})\ ,
\end{equation} i.e. the form $\widetilde{\ch}_{2p}(\nabla,\nabla^{*})$ is imaginary.

\bigskip

For $p>\mathrm{codim}(\cF)$ the class
$$c_{2p-1}(\nabla^{I}):=c(\nabla^{I},\nabla^{I,*})\in H^{2p-1}(M;\C) $$
does not depend on the choice of the hermitean metric metric.
By \eqref{dewdwedwedewded} it is imaginary, i.e. it belongs to the real subspace $iH^{2p-1}(M;\R)\subseteq H^{2p-1}(M;\C) $.
\bigskip

We can apply this construction to the bundle $\cF^{\perp}$ with its canonical flat partial connection $\nabla^{I,\cF^{\perp}}$, see Example \ref{fklwefjwefewf}. The class \begin{equation}\label{kfkwejwlkefjlwekfjewfewfopipoi234}
c_{2p-1}(\nabla^{I,\cF^{\perp}})\in H^{2p-1}(M;\C)
\end{equation} is closely related to the Godbillon-Vey class of the foliation. If the foliation $\cF$ is real, then we can explain the place of this invariant in the classification of characteristic classes for foliations. See Remark \ref{ergegojerglerogergeg}, in particular \eqref{ewfwefewfwf432342344123}.

}
\end{ex}

Let $V\to M$ be a real bundle. Then using a similar notation as above we can define
\begin{equation}\label{nkdqlwdwqddwqdj09809}
\widetilde{\hA}(\nabla_{1},\nabla_{0}):=\int_{I\times M/M}\hA(\tilde \nabla)\in DD^{per}(M)^{-1}/\im(d)\ .\end{equation}
We have

\begin{equation}\label{1e1h2ekj12ej12ke21hek2eh}d \widetilde{\hA}(\nabla_{1},\nabla_{0}):=\hA(\nabla_{1})-\hA(\nabla_{0})\ .\end{equation} The transgression is independent of the choice of the connection $\tilde \nabla$. Furthermore, we have the identities $$\widetilde{\hA}(\nabla_{1},\nabla_{0})+\widetilde{\hA}(\nabla_{2},\nabla_{1})+\widetilde{\hA}(\nabla_{0},\nabla_{2})=0\ , \quad \widetilde{\hA}(\nabla_{1},\nabla_{0})+\widetilde{\hA}(\nabla_{0},\nabla_{1})=0\ . $$

Finally, if $\nabla_{1}$ and $\nabla_{0}$ extend the same flat partial connection, then we can define
$$\widetilde{\hA^{-}}(\nabla_{1},\nabla_{0}):=\int_{I\times M/M}\hA(\tilde \nabla)\in DD^{-}(M)^{-1}/\im(d)\ .$$

We now assume that the foliation $\cF$ is real.
Let $g^{TM}$ be a Riemannian metric on $M$.
We get a decomposition $TM\cong \cF_{\R}\oplus \cF_{\R}^{\perp}$ and choose a connection $\nabla^{\cF_{\R}^{\perp}}$ extending $\nabla^{I,\cF_{\R}^{\perp}}$. We further assume that $\cF_{\R}$ has a stable framing $s $. Let $\nabla^{\cF_{\R},triv}$ be the associated stable trivial connection on $\cF_{\R}$. We have forms
$$\widetilde{\hA}(\nabla^{LC},\nabla^{\cF_{\R},triv}\oplus \nabla^{\cF_{\R}^{\perp}})\in DD^{per,-1}(M) \ , \quad \hA(\nabla^{\cF_{\R},triv}\oplus \nabla^{\cF_{\R}^{\perp}})=\hA(\nabla^{\cF_{\R}^{\perp}})\in DD^{-,0}_{cl}(M)\ .$$

\section{Differential $K$-theory}\label{jfkelwfwefewfewfewfewfwef}

In this subsection we recall some basic features of the Hopkins-Singer version of differential complex $K$-theory.

\bigskip

References for the following material are the foundational paper by Hopkins-Singer \cite{MR2192936}, but also \cite{MR2732065} and \cite{MR2664467}. For differential orientations and Umkehrmaps we refer to \cite{MR2664467}, \cite{Freed-Lott}, and \cite{2012arXiv1208.3961B}

\subsection{Basic structures}

We describe the differential extension $\widehat{KU}^{*}$ of the generalized cohomology theory $\mathbf{KU}^{*}$. Here $ \mathbf{KU}^{*}$ is the periodic topological complex $K$-theory which is represented by the spectrum $\mathbf{KU}$.
For every $p\in \Z$ we have a contravariant functor
$$\widehat{KU}^{p}:\Mf^{op}\longrightarrow \Ab $$
from smooth manifolds to abelian groups. This functor is connected with periodic topological complex $K$-theory $\mathbf{KU}^{*}$ via a transformation $$I:\widehat{KU}^{p}\to \mathbf{KU}^{p}$$ of abelian group valued functors. The transformation $I$
maps differential $K$-theory classes to their underlying topological $K$-theory classes.
Furthermore, differential $K$-theory is connected with differential forms through natural transformations $R$ and $a$.
The curvature $R$ is a natural transformation
$$ R:\widehat{KU}^{p}\to Z^{p}(DD^{per}(M)) \ .$$
In particular, if $x\in \widehat{KU}^{p}(M)$, then $R(x)\in Z^{p}(DD^{per}(M))$
is a differential form representing the Chern character of the underlying topological class $I(x)$.
The transformation $$a: DD^{per,p-1}/\im(d) \to \widehat{KU}^{p}$$ encodes the secondary information contained in differential $K$-theory classes.
All these structures and their compatibilities are nicely encoded in the following commutative diagram, also called the differential cohomology diagram \cite{MR2365651}:
$$\xymatrix{& DD^{per,p-1} /\im(d)\ar[dr]^{a}\ar[rr]^{d}&&Z^{p}(DD^{per} )\ar[dr]^{Rham}&\\H^{p-1}(DD^{per} ) \ar[ur]\ar[dr]&&\widehat{KU}^{p} \ar[ur]_{R}\ar[dr]^{I}&&H^{p}(DD^{per} ) \\&\mathbf{KU}\C/\Z^{p-1} \ar[ur]\ar[rr]^{Bockstein}&&\mathbf{KU}^{p} \ar[ur]^{\ch}&}\ .$$
Its upper and lower parts are segments of long exact sequences, and the diagonals are exact at the center.

\bigskip

The flat part of differential $K$-theory is defined as the kernel of the curvature transformation $R$: $$\widehat{KU}^{p}_{flat}:=\ker(R:\widehat{KU}^{p}\to Z^{p}(DD^{per}))\ .$$ It is canonically isomorphic to $\C/\Z$-$K$-theory (with a shift):\begin{equation}\label{eq700hhh}\widehat{KU}^{p}_{flat} \cong \mathbf{KU} \C/\Z^{p-1} \ .\end{equation}

\bigskip

The sequence
\begin{equation}\label{eq700}\mathbf{KU}^{p-1}\stackrel{\ch}{\to} DD^{per,p-1}/\im(d)\stackrel{a}{\to} \widehat{KU}^{p}\stackrel{I}{\to} \mathbf{KU}^{p}\to 0\end{equation}
is exact.

\bigskip

The differential $K$ -theory of a point is given by
\begin{equation}\label{djhqkjwdqwdq9879}\widehat{KU}^{p}(*)\cong \left\{\begin{array}{cc} \Z&\mbox{$p$ is even}\\
\C/\Z&\mbox{$p$ is odd}\end{array}\right.\ .\end{equation}

\bigskip

Differential $K$-theory is not homotopy invariant. The deviation from homotopy invariance is quantified by the homotopy formula. If $\hat x\in \widehat{KU}^{p}([0,1]\times M)$, then the homotopy formula states that
\begin{equation}\label{homotopyformula}(\iota_{1}\times \id_{M})^{*}\hat x-(\iota_{0}\times \id_{M})^{*}\hat x=a(\int_{[0,1]\times M/M} R(\hat x))\ . \end{equation}

\subsection{The cycle map}

A complex vector bundle with connection $(V,\nabla)$ on a manifold $M$ gives rise to a differential $K$-theory class
$[V,\nabla]\in \widehat{KU}^{0}(M)$ such that \begin{equation}\label{dkdqwdwqdqwdwqdkl8997}R([V,\nabla])=\ch(\nabla)\in Z^{0}(DD^{per}(M))\ , \quad I([V,\nabla])=[V]\in \mathbf{KU}^{0}(M) \end{equation}

Let $M\mapsto \pi_{0}({\mathbf{Vect}}^{\nabla}(M))$ denote the contravariant functor which associates to the manifold $M$ the commutative monoid (induced the direct sum) of isomorphisms classes of pairs $(V,\nabla)$ of vector bundles with connection on $M$ and to a smooth map between manifolds $f:M\to M^{\prime}$ the pull-back $f^{*}$.
The additive natural transformation
$$[\dots]:\pi_{0}({\mathbf{Vect}}^{\nabla})\to \widehat{KU}^{0}$$ is called the cycle map and fits into the commuting diagram
$$\xymatrix{&Z^{0}(DD^{per})\\\pi_{0}({\mathbf{Vect}}^{\nabla})\ar[r]^{[\dots]}\ar[ur]^{[V,\nabla]\mapsto \ch(\nabla)}\ar[dr]_{[V,\nabla]\mapsto[V]}&\widehat{KU}^{0}\ar[u]^{R}\ar[d]_{I}\\&\mathbf{KU}^{0}}\ .$$

For compact manifolds $M$ the cycle map is known to be surjective \cite{MR2732065}. Assume that $\nabla_{0}$ and $ \nabla_{1}$ are two connections on the bundle $V$. Then as a consequence of the homotopy formula \eqref{homotopyformula} we have
\begin{equation}\label{kdqklwdjqwlkdjqlwdkwqd89798}[V,\nabla_{1}]-[V,\nabla_{0}]=a(\tilde \ch(\nabla_{1},\nabla_{0}))\ .\end{equation}

\subsection{Differential orientations and Umkehr maps}\label{wlekfjwelfjewlfwef123}

Let $\pi:W\to B$ be a proper submersion such that the vertical bundle $T^{v}\pi$ has a $Spin^{c}$-structure. Then $\pi$ is equipped with an orientation $o$ (called the Atiyah-Bott-Shapiro orientation \cite{MR0167985}) for the cohomology theory $\mathbf{KU}^{*}$ and admits an Umkehr map
$$\pi^{o}_{!}:\mathbf{KU}^{p}(W)\to \mathbf{KU}^{p-d}(B)\ ,$$ where $d:=\dim(W)-\dim(B)$ is the dimension of the fibre (assume for simplicity that $B$ is connected). Since $\mathbf{KU}\C/\Z$ is a $\mathbf{KU}$-module spectrum we also have an integration
\begin{equation}\label{hjkfhjkhkjhkjefwefefwe7987}
\pi^{o}_{!}:\mathbf{KU}\C/\Z^{p}(W)\to \mathbf{KU}\C/\Z^{p-d}(B)\ .
\end{equation}

The $\mathbf{KU}$-orientation $o$ determines a cohomology class
$$\hA(o)\in H^{0}(DD^{per}(W))$$ such that the Riemann-Roch theorem holds true:
$$\xymatrix{\mathbf{KU}^{p}(W)\ar[d]^{\pi_{!}^{o}}\ar[r]^{\ch}&H^{p}(DD^{per}(W))\ar[d]^{\int_{W/B}\hA(o)\cup \dots}\\ \mathbf{KU}^{p-d}(B)\ar[r]^{\ch}&H^{p-d}(DD^{per}(B))}$$

Differential refinements of $\mathbf{KU}$-orientations have been studied in detail in \cite{MR2664467}, \cite{Freed-Lott}, see also
\cite{2012arXiv1208.3961B} for a more homotopy-theoretic approach.
In order to refine the $\mathbf{KU}$-orientation $o$ to a $\widehat{KU}$-orientation $\hat o$ we must choose additional geometric structures. First of all we choose a metric on the vertical tangent bundle $T^{v}\pi$ and a horizontal distribution $T^{h}\pi$, i.e. a complement of the vertical bundle in $TW$. These structures induce a vertical Levi-Civity connection $\nabla^{T^{v}\pi}$, see e.g \cite[Prop. 10.2]{MR2273508}. In order to fix $\hat o$ we must further choose a $Spin^{c}$-extension $\tilde \nabla^{T^{v}\pi}$ of $\nabla^{T^{v}\pi}$. The $\widehat{KU}$-orientation $\hat o$
gives rise to an Umkehr map (see \cite[3.2.3]{MR2664467})
$$\pi^{\hat o}_{!}:\widehat{KU}^{p}(W)\to \widehat{KU}^{p-d}(B)\ .$$ The differential orientation further provides a form $$\hA(\hat o)\in Z^{0}(DD^{per}(W))$$ representing the class $\hA(o)$. The Umkehr map in $\widehat{KU}$-theory fits into the following commutative diagram \begin{equation}\label{r23r23r23r23r23r235435346546}
\hspace{-1cm}\xymatrix{\mathbf{KU}\C/\Z^{p-1}(W)\ar[d]^{\pi_{!}^{o}}\ar@/^1cm/[rr]&DD^{per}(W)^{p-1}/\im(d)\ar[d]^{\int_{W/B}\hA(\hat o)\wedge \dots}\ar[r]^(0.6){a}&\widehat{KU}^{p}(W)\ar[d]^{\pi_{!}^{\hat o}}\ar[r]^{R}\ar@/^1cm/[rr]^{I}&Z^{p}(DD^{per}(W))\ar[d]^{\int_{W/B}\hA(\hat o)\cup \dots}&KU^{p}(W)\ar[d]^{\pi_{!}^{o}}\\\mathbf{KU}\C/\Z^{p-d-1}(B)\ar@/_1cm/[rr]&DD^{per}(B)^{p-d-1}/\im(d)\ar[r]^(0.6){a}&\widehat{KU}^{p-d}(B)\ar[r]\ar@/_1cm/[rr]_{I}&Z^{p-d}(DD^{per}(B))&KU^{p-d}(B)}
\end{equation}

The set of $\widehat{KU}$-orientations refining an underlying topological $\mathbf{KU}$-orientation $o$ is a torsor over $DD^{per}(W)^{-1}/\im(d)$ such that for $\alpha\in DD^{per}(W)^{-1}/\im(d)$ we have
\begin{equation}\label{lwefwwfewfewfefewfewfewffwe}
\hA(\hat o+\alpha)=\hA(\hat o)+d\alpha .\end{equation}

We have the rules (all following from \cite[(17)]{MR2664467})
\begin{equation}\label{kdklqwjdlqwjdwldqwdqwdqd}\pi_{!}^{\hat o+\alpha}(x)=\pi_{!}^{\hat o}(x)+[\int_{M}d\alpha\wedge R(x)]_{\C/\Z}\ , \quad \pi_{!}^{\hat o}(a(\omega))=[\int_{M} \hA(\hat o)\wedge \omega]_{\C/\Z} \ .\end{equation}

If $\pi=\pi_{1}\circ \pi_{0}$ is a composition of proper submersions and $\hat o_{i}$ are $\widehat{KU}$-orientations of $\pi_{i}$, then we can define a composed orientation $\hat o=\hat o_{1}\circ \hat o_{0}$ for $\pi$ in a natural way such that
\begin{equation}\label{e23e23e3e23e32e32e2}
\pi_{!}^{\hat o}=\pi^{\hat o_{1}}_{1,!}\circ \pi_{0,!}^{\hat o_{0}}\ .
\end{equation}
If
$$\xymatrix{W^{\prime}\ar[d]^{\pi^{\prime}}\ar[r]^{g}&W\ar[d]^{\pi}\\B^{\prime}\ar[r]^{f}&B}$$
is a cartesian diagram and $\hat o$ is a differential orientation of $\pi$, then we can define a $\widehat{KU}$-orientation $\hat o^{\prime}$ of $\pi^{\prime}$ such that
$$\pi^{\prime,\hat o^{\prime}}_{!}\circ g^{*}=f^{*}\circ \pi_{!}^{\hat o}\ .$$

In order to avoid the additional complexity of the choice of $Spin^{c}$ extensions of connections on real vector bundles with $Spin^{c}$-structures in the present paper we will work with $Spin$-structures.
If $T^{v}\pi$ has a spin structure, then it has an induced $Spin^{c}$-structure, and a connection
$ \nabla^{T^{v}\pi}$ has a canonical $Spin^{c}$-extension which we take from now on.
If the $\widehat{KU}$-orientation $\hat o$ is defined using the vertical metric and the horizontal distribution as above, then we have \begin{equation}\label{fwefw4343433fewfwf}
\hA(\hat o)=\hA(\nabla^{T^{v}\pi})\ ,\end{equation}
where $\nabla^{T^{v}\pi}$ is the Levi-Civita connection.

\bigskip

Assume that $\pi:W\to B$ is a submersion with fibrewise boundary $\partial \pi:\partial W\to B$.
If $\hat o$ is a $\widehat{KU}$-orientation of $\pi$, then we can define an induced $\widehat{KU}$-orientation $\partial \hat o$ of $\partial \pi$. In this situation we have the bordism formula \cite[Prop. 5.18]{MR2664467}. If $\hat x\in \widehat{KU}^{p}(W)$, then \begin{equation}\label{jkhdkqjwdhqwdwqdqwdqwdqwd789}
\partial\pi^{\partial \hat o}_{!}(\hat x_{|\partial W})=a(\int_{W/B} \hA(\hat o)\wedge R(\hat x ))\ .\end{equation}

\section{The invariant}\label{flkwjefwkejfeflkewjflkefjelkfjelfjewlfewf9790}

Given a closed odd-dimensional real foliated spin manifold $(M,\cF)$ such that $$2\mathrm{codim}(\cF)< \dim(M)$$ with a stable real framing $s$ of $\cF$ and a complex vector bundle $(V,\nabla^{I})$ with flat partial connection we define an invariant $$\rho(M,\cF,\nabla^{I},s)\in \C/\Z\ .$$

\bigskip

In order to define the invariant we first choose the following additional geometric data:
\begin{enumerate}
\item We choose a connection $\nabla$ on $V$ which extends $\nabla^{I}$, see Definition \ref{fjwelfwfewf42343242wf}.
\item We choose an extension $\nabla^{\cF_{\R}^{\perp}}$ of the flat partial connection $\nabla^{I,\cF^{\perp}_{\R}}$, see Example \ref{kjlwefwefwef}.

\item We choose a Riemannian metric $g^{TM}$.
\end{enumerate}

For $n\in \nat$ we let $\underline{\R^{n}}$ denote the trivial $n$-dimensional real vector bundle on $M$.
A stable real framing $s$ of $\cF$ is an isomorphism of real vector bundles
$$s:\cF_{\R}\oplus \underline{\R^{n}}\cong \underline{\R^{m}}$$
for certain choices of $n,m\in \nat$. The framing and the trivial connection on $\underline{\R^{m}}$ induce a connection $\nabla^{s}$ on $\cF_{\R}\oplus \underline{\R^{n}}$.
The Riemannian metric on $M$ further induces an orthogonal splitting $TM\cong \cF_{\R}\oplus \cF_{\R}^{\perp}$ so that we can consider both connections
$\nabla^{LC}\oplus \nabla^{\underline{\R^{n}}}$ and $\nabla^{s}\oplus \nabla^{\cF^{\perp}_{\R}}$ on the same bundle $$TM\oplus \underline{\R^{n}}\cong (\cF_{\R}\oplus \underline{\R^{n}})\oplus \cF_{\R}^{\perp}\ .$$ In particular, we can define the transgression form (see \eqref{nkdqlwdwqddwqdj09809})
$$\widetilde{\hA}(LC,s):=\widetilde{\hA}(\nabla^{LC}\oplus \nabla^{\underline{\R^{n}}},\nabla^{s}\oplus \nabla^{\cF^{\perp}_{\R}})\in DD^{per}(M)^{-1}/\im(d)\ ,$$
where $\nabla^{LC}$ is the Levi-Civita connection on $TM$ associated to the Riemannian metric $g^{TM}$.

\bigskip

We now consider the map $\pi:M\to *$. Since $M$ is closed this is a proper submersion.
Since $M$ is spin, this map has a $KU$-orientation $o$. The choice of a Riemannian metric refines the orientation $o$ to a $\widehat{KU}$--orientation $\hat o$ (note that the horizontal bundle is the zero bundle), see Subsection \ref{wlekfjwelfjewlfwef123}.

\begin{ddd}\label{flwefjwefewff} Let
$M$ be an odd-dimensional real foliated closed spin manifold, $s$ be a stable real framing of $\cF$, and $\nabla^{I}$ be a flat partial connection on a complex vector bundle on $M$. Assume further that we have fixed $g^{TM}$, $\nabla^{\cF_{\R}^{\perp}}$, and $\nabla$.
Then we define
$$\rho(M,\cF,\nabla^{I},s):=\pi_{!}^{\hat o-\widetilde{\hA}(LC,s)}([V,\nabla])\in \widehat{KU}^{-\dim(M)}(*)\stackrel{\eqref{djhqkjwdqwdq9879}}{\cong} \C/\Z\ .$$
\end{ddd}

In general, this quantity depends on the additional choices $g^{TM}$, $\nabla^{\cF_{\R}^{\perp}}$, and $\nabla$. It will be a consequence of the bordism invariance, that $\rho(M,\cF,\nabla^{I},s)$ is actually independent of these choices provided $2\mathrm{codim}(\cF)<\dim(M)$.

\begin{prop} \label{fklwfjeklfwefewfwf897}
Assume that $(M,\cF)$, $\nabla^{I}$, $s$ as well as $g^{TM}$, $\nabla^{\cF_{\R}^{\perp}}$, and $\nabla$ are as in Definition \ref{flwefjwefewff} with the exception that $M$ is even-dimensional and has a boundary $\partial M$ which is transversal to $\cF$. We further assume that the geometric structures have a product structure near $\partial M$.
Then we have
$$\rho(\partial M,\cF_{|\partial M},\nabla^{I}_{ |\partial M},s_{|\partial M})=[\int_{M}\hA^{-}(\nabla^{\cF_{\R}^{\perp}})\wedge \ch^{-}(\nabla)]_{\C/\Z}\ .$$
In particular, if $2\mathrm{codim}(\cF)<\dim(M)$, then $\rho(\partial M,\cF_{|\partial M},\nabla^{I}_{ |\partial M},s_{|\partial M})=0$.
\end{prop}
\proof By \eqref{jkhdkqjwdhqwdwqdqwdqwdqwd789} we have
$$\rho(\partial M,\cF_{|\partial M},\nabla^{I}_{ |\partial M},s_{|\partial M})=[\int_{M} \hA(\hat o-\widetilde{\hA}(LC,s))\wedge R([V,\nabla])]_{\C/\Z}\ .$$
Using \eqref{lwefwwfewfewfefewfewfewffwe}, \eqref{fwefw4343433fewfwf} and \eqref{dkdqwdwqdqwdwqdkl8997}
we get
$$\rho(\partial M,\cF_{|\partial M},\nabla^{I}_{ |\partial M},s_{|\partial M})=[\int_{M} (\hA(\nabla^{LC})-d\widetilde{\hA}(LC,s))\wedge \ch(\nabla) ]_{\C/\Z}\ .$$
We apply \eqref{1e1h2ekj12ej12ke21hek2eh} and the fact that $\hA$ is multiplicative in order to rewrite this as
$$\rho(\partial M,\cF_{|\partial M},\nabla^{I}_{ |\partial M},s_{|\partial M})=[\int_{M} \hA(\nabla^{\cF_{\R}^{\perp}})\wedge \ch(\nabla)]_{\C/\Z}\ .$$
We now use that both, $\nabla^{\cF_{\R}^{\perp}}$ and $\nabla$ extend flat partial connections. The associated characteristic forms therefore refine to cycles in $DD^{-}(M)$.
Hence by Example \ref{kjlwefwefwef} and Definition \ref{qldjqwldqwdqwdqwd} we have
$$ \int_{M}\hA(\nabla^{\cF_{\R}^{\perp}})\wedge \ch(\nabla)=\int_{M}\hA^{-}(\nabla^{\cF_{\R}^{\perp}})\wedge \ch^{-}(\nabla) \ .$$
This implies the first assertion.

\bigskip

The integral of $\int_{M}$ factorizes over the component in $$DD^{-}(M)(p)^{0}=F^{p}\Omega^{2p}(M)$$ with $p=\dim(M)/2$.
If $\mathrm{codim}(\cF)<p$ we have $F^{p}\Omega^{2p}(M,\cF)=0$ and hence \begin{equation}\label{rferfefref32234324344}
Z^{0}(DD^{-}(M)(2p))=0\ .
\end{equation}
This implies the second claim.
\hB

In the following we define the opposite of a framing and a spin structure.
Let $(M,\cF)$ and the stable real framing $s$ of $\cF$ be given. Then we can form the cylinder
$I\times M$ with the foliation $T_{\C}I\boxplus \cF$, see Example \ref{dekldlqwdqwdqwd}. We trivialize $TI\cong I\times \underline{\R}$ using the section $\partial_{t}$, where $t$ is the standard coordinate of the cylinder. Then we write
$T(I\times M)\cong TI\boxplus TM\cong \underline{\R}\boxplus TM$ in order to define the induced spin structure on $I\times M$. Furthermore, the identification $T_{\C}I\boxplus \cF\cong T\underline{\C}\boxplus \cF$ provides the stable real framing $I\times s$ of $T_{\C}I\boxplus \cF$. These constructions are made such that
$(M,\cF,s)$ is the boundary of $(I\times M,T_{\C}I\boxplus \cF,I\times s)$ at the upper face of the cylinder corresponding to $1\in I$. \begin{ddd} We define $(M^{op},\cF,s^{op})$ to be the boundary of the cylinder at $0\in I$. \end{ddd} Here $M^{op}$ indicates that $M$ is equipped with the opposite spin structure.

\bigskip

We adopt all the assumption made in Definition \ref{flwefjwefewff} and fix choices for $\nabla$, $\nabla^{\cF_{\R}^{\perp}}$ and $g^{TM}$. These can be extended constantly over the cylinder. In this case $\hA^{-}(\nabla^{I\times \cF_{\R}^{\perp}})\wedge \ch^{-}(\pr^{*}\nabla)$
is pulled back from $M$ and has no $dt$-component. Hence its integral over $I\times M$ vanishes.
\begin{kor}
$$\rho(M,\cF,\nabla^{I},s)=-\rho(M^{op},\cF,\nabla^{I},s^{op})\ .$$
\end{kor}

Assume now that we have two choices for $\nabla$, $\nabla^{\cF_{\R}^{\perp}}$ and $g^{TM}$. Then we can again consider the cylinder over $M$ and interpolate between these choices. The second assertion of Proposition \ref{fklwfjeklfwefewfwf897} and the vanishing \eqref{rferfefref32234324344} for $p=\frac{\dim(M)+1}{2}$ implies:
\begin{kor}\label{djwqdlqwjdkwqdwqdqd} If $2\mathrm{codim}(\cF)< \dim(M)$, then
$\rho(M,\cF,\nabla^{I},s)$ is independent of the choices of $\nabla$, $\nabla^{\cF_{\R}^{\perp}}$ and $g^{TM}$.
\end{kor}

\section{A spectral geometric interpretation}\label{fkweflwefewfewfewfwfwfw}

In this section we express $\rho(M,\cF,\nabla^{I},s)$ in terms of spectral invariants of Dirac operators.

\bigskip

Let $M$ be a closed spin manifold with a Riemannian metric $g^{TM}$ and $\bV:=(V,h^{V},\nabla^{u})$ be a hermitean vector bundle with an unitary connection. Then we can form the twisted Dirac operator $\slashed{D}\otimes \bV$. It is a first order elliptic differential operator which acts on the space of sections of $\Gamma(M,S(TM)\otimes V)$, where $S(TM)$ is the spinor bundle of $M$. It is symmetric with respect to the natural $L^{2}$-metric. Its spectrum is real and consists of eigenvalues of finite multiplicity accumulating at $\pm\infty$. By Weyl's asymptotics the number of eigenvalues with absolute value $\le R$ (counted with multiplicity) grows as $R^{\dim(M)}$.
The $\eta$-function of this operator was introduced by Atiyah-Patodi-Singer \cite{MR0397797} and is defined by
$$\eta(\slashed{D}\otimes \bV)(s)=\sum_{\lambda\not=0} m_{\lambda} \sign(\lambda)|\lambda|^{-s}\ ,$$
where the sum is taken over the non-zero eigenvalues of $\slashed{D}\otimes \bV$ and $m_{\lambda}$ denotes the multiplicity. The sum converges for $\Ree(s)>\dim(M)$. It has been further shown by \cite{MR0397797} that the $\eta$-function has a meromorphic continuation to all of $\C$ which is regular at $s=0$.

\begin{ddd} The $\eta$-invariant of $\slashed{D}\otimes \bV$ is defined by $\eta(\slashed{D}\otimes \bV):=\eta(\slashed{D}\otimes \bV)(0)$. We further define the reduced $\eta$-invariant
$$\xi(\slashed{D}\otimes\bV):=[\frac{\eta(\slashed{D}\otimes \bV)+\dim(\ker(\slashed{D}\otimes \bV))}{2}]_{\C/\Z}\in \C/\Z\ .$$
\end{ddd}

We consider the projection $\pi:M\to *$. The spin structure on $M$ and the Riemannian metric $g^{TM}$ induce a $\widehat{KU}$-orientation $\hat o$, see Subsection \ref{wlekfjwelfjewlfwef123}. The geometric bundle
$\bV$ defines a class $[V,\nabla^{u}]\in \widehat{KU}^{0}(M)$. Using the identification \eqref{djhqkjwdqwdq9879} we get by \cite[Cor. 5.5]{MR2664467}

\begin{prop}\label{kqwdkjqdwqdwd}
$$\pi_{!}^{\hat o}([V,\nabla^{u}])=\xi(\slashed{D}\otimes\bV)\ .$$
\end{prop}

We now adopt the assumptions of Definition \ref{flwefjwefewff}.
We further choose a hermitean metric $h^{V}$ and an unitary connection $\nabla^{u}$ and set $\bV:=(V,h^{V},\nabla^{u})$.

\begin{prop}\label{fjwfkljfklfjwelkfjkewjfewlkf9798237982749237432243}
We have
$$\rho(M,\cF,\nabla^{I},s)=\xi(\slashed{D}\otimes\bV)-[\int_{M} \widetilde{\hA}(LC,s)\wedge \ch(\nabla)-
\int_{M}\hA(\nabla^{LC})\wedge \widetilde{\ch}(\nabla,\nabla^{u})]_{\C/\Z}\ .$$
\end{prop}
\proof We use the rules \eqref{kdklqwjdlqwjdwldqwdqwdqd} for $x=[V,\nabla]$, $\alpha=-\widetilde{\hA}(LC,s)$ and $\omega=\widetilde{\ch}(\nabla,\nabla^{u})$ and equations \eqref{dkdqwdwqdqwdwqdkl8997}, \eqref{fwefw4343433fewfwf} and \eqref{kdqklwdjqwlkdjqlwdkwqd89798}. \hB

\section{Special cases}\label{jbjkhwekfewfwefewf89798}

\subsection{Adams $e$-invariant}\label{kdljlqwdqwdqwd}

We consider the case of the maximal foliation $\cF_{max}=T_{\C}M$ on a closed manifold $M$, see Example \ref{lkjwqdwdqwdqwdqwd}. Then a stable framing $s$ of $\cF_{max,\R}$ is a stable framing of $TM$ and
$(M,s)$ is a cycle for the framed bordism class $[M,s]\in \Omega^{fr}_{\dim(M)}$. The Pontrjagin-Thom construction identifies the framed bordism theory $\Omega^{fr}_{*}$ with the homology theory represented by the sphere spectrum. In particular, its coefficients are the stable homotopy groups of the sphere $\Omega^{fr}_{*}(*)\cong \pi_{*}^{s}$. In his study of the j-homomorphism Adams defined in \cite{MR0198470} a homomorphism
$$e^{Adams}_{\C}: \pi^{s}_{k}\to \C/\Z$$
for odd $k\in \nat$. A spectral geometric interpretation of $e^{Adams}_{\C}$ has been given by Atiyah-Patodi-Singer in
\cite{MR0397797}. In the following we describe $e^{Adams}_{\C}$ using differential $KU$-theory.

The stable framing $s$ induces a spin structure on $M$. Given a Riemannian metric
$g^{TM}$ we obtain a $\widehat{KU}$-orientation $\hat o$ of $\pi:M\to *$. It has been observed in \cite[Prop. 5.22]{MR2664467}) that the $\widehat{KU}$-orientation
$$\hat o_{s}:=\hat o-\widetilde{\hA}(LC,s)$$ of $\pi$
does not depend on the choice of the Riemannian metric.

Let $\beins\in \widehat{KU}(M)$ be the class of the trivial one-dimensional bundle $(\underline{\C},\nabla^{triv})$.
Then by \cite[Lemma. 5.24]{MR2664467}) we have using \eqref{djhqkjwdqwdq9879}

\begin{equation}\label{rewrewrrwr324324234234}
e^{Adams}_{\C}([M,s])= \pi_{!}^{\hat o_{s}}(\beins)\ .
\end{equation}
The following Corollary immediately follows from the Definition \ref{flwefjwefewff}.
\begin{kor}
We have
$$\rho(M,\cF_{max},\nabla^{triv},s)= e^{Adams}_{\C}([M,s])\ .$$
\end{kor}

Using the first identity in \eqref{kdklqwjdlqwjdwldqwdqwdqd} we get the expression (to be used later) \begin{equation}\label{kljklqwdwqd}e^{Adams}_{\C}([M,s])= \pi_{!}^{\hat o}(\beins) -[\int_{M} \widetilde{\hA}(LS,s)]_{\C/\Z} \end{equation} for the $e$-invariant.

\subsection{The $\rho$-invariant of flat bundles}

Assume now that $(V,\nabla)$ is a flat bundle on a closed odd-dimensional spin manifold $M$. The spin structure on $M$ equips the map $\pi:M\to *$ with an orientation $o$ for $\mathbf{KU}$. We observe that $$[V,\nabla^{V}]-\dim(V)\beins\in \widehat{KU}^{0}_{flat}(M)\cong \mathbf{KU}\C/\Z^{-1}(M)\ .$$
Hence we can apply the integration map \eqref{hjkfhjkhkjhkjefwefefwe7987} to this difference.
\begin{ddd}
We define the $\rho$-invariant of $\nabla$ by
$$\rho(\nabla):=\pi_{!}^{o}([V,\nabla]-\dim(V)\beins)\in \C/\Z \ .$$
\end{ddd}

If we choose a Riemannian metric, then we get a refinement of $o$ to a $\widehat{KU}$-orientation $\hat o$ of $\pi$. Using the integration in differential $K$-theory and \eqref{r23r23r23r23r23r235435346546} we can write
\begin{equation}\label{jhjkhjkehfkewhfkfhkewjhfkewjfewfwf}
\rho(\nabla):=\pi_{!}^{\hat o}([V,\nabla])-\dim(V)\pi_{!}^{\hat o}(\beins) \ .
\end{equation}

\begin{rem}{\rm Assume that $h^{V}$ is a hermitean metric on $V$ preserved by $\nabla$. Then we can form the geometric bundle $\bV=(V,\nabla,h^{V})$. As a consequence of Proposition \ref{kqwdkjqdwqdwd} we have \begin{equation}\label{jhdjkehdkwwedewd}
\pi_{!}^{\hat o}([V,\nabla])-\dim(V)\pi_{!}^{\hat o}(\beins)=\xi(\slashed{D}\otimes \bV)-\dim(V)\xi(\slashed{D})\ .
\end{equation}
Combining this with \eqref{jhjkhjkehfkewhfkfhkewjhfkewjfewfwf} we get the statement of the index theorem for flat vector bundles by Atiyah-Patodi-Singer \cite[Thm. 5.3]{MR0397799}:
$$ \xi(\slashed{D}\otimes \bV)-\dim(V)\xi(\slashed{D})=\rho(\nabla)\ .$$
Observe that this is really a non-trivial statement. The left-hand side of this equality is the analytic index of the flat bundle, and the right-hand side is the topological index since we have defined the $\rho$-invariant using the topological integration in $\mathbf{KU}\C/\Z$-theory.
}
\end{rem}

Let us now assume that the spin structure on $M$ is induced by a stable framing $s$ of $TM$.
\begin{lem}
We have
\begin{equation}\label{mkmxlkmlqwxqwx}\rho(M,\cF_{max},\nabla,s)=\rho(\nabla)+\dim(V) e^{Adams}_{\C}([M,s])\ .\end{equation}
\end{lem}
\proof Since $\nabla$ is flat we have
\begin{equation}\label{dqjwqwddqwdqwdwqd}\pi_{!}^{\hat o-\widetilde{\hA}(LS,s)}([V,\nabla])=\pi_{!}^{\hat o}([V,\nabla])-\dim(V)[\int_{M} \widetilde{\hA}(LS,s)]_{\C}\ .\end{equation}
We first use \eqref{kljklqwdwqd}
in order to replace the second term in \eqref{dqjwqwddqwdqwdwqd} and then apply \eqref{jhjkhjkehfkewhfkfhkewjhfkewjfewfwf}. \hB

\begin{rem}{\rm The decomposition \eqref{mkmxlkmlqwxqwx} of the invariant $\rho(M,\cF_{max},\nabla,s)$ is very interesting.
A priori this quantity depends on the isomorphism class of the flat bundle $(V,\nabla)$.
But we now observe that $\rho(M,\cF_{max},\nabla,s)$ is actually an invariant of the class $[V,\nabla]^{alg}\in {\mathbf{K}}(\C)^{0}(M)$ represented by $(V,\nabla)$.
This fact has already been shown in \cite{westburyjones} as we will explain in the following.

Fix a base point $m\in M$, choose an identification $V_{m}\cong \C^{\dim(V)}$, and let $\alpha:\pi_{1}(M,m)\to GL(\dim(V),\C)$ denote the holonomy representation associated associated to the flat connection $\nabla$ on $V$.
Then the quantity $e(M,\alpha)\in \C/\Z$ introduced by \cite{westburyjones} (for $M$ a homology sphere) can be written in the form (compare \cite[Thm. A]{westburyjones})
$$e(M,\alpha)=\rho(\nabla)\ .$$
The number $e(M,\alpha)\in \C/\Z$ only depends on the algebraic $K$-theory class of $M$
determined by $\alpha$ which in our notation is $[V,\nabla]^{alg}\in {\mathbf{K}}(\C)^{0}(M)$.
Since clearly $\dim(V)$ is an invariant of $[V,\nabla]^{alg}$ as well, the combination
$$\rho(M,\cF_{max},\nabla,s)=e(M,\alpha)+\dim(V) e^{Adams}_{\C}([M,s])$$
only depends on the class $[V,\nabla]^{alg}$ of $(V,\nabla)$.

In Section \ref{keklwfewfewfewf} we will show a much stronger result. We will see that $ \rho(M,\cF_{max},\nabla,s)$
only depends on the class $$\pi^{o_{s}}_{!}([V,\nabla]^{alg})\in {\mathbf{K}}(\C)^{-\dim(M)}(*)\cong K_{\dim(M)}(\C)\ ,$$ where ${\mathbf{K}}(\C)^{*}$ is the cohomology theory represented by the algebraic $K$-theory spectrum of $\C$, and $o_{s}$ is the orientation of $\pi:M\to *$ given by the framing $s$ for stable cohomotopy (and hence for every cohomology theory since it is a module theory over stable cohomotopy).

The formula \eqref{mkmxlkmlqwxqwx} can be compared with formulas in Theorem \cite[Thm 5.5]{2011arXiv1103.4217B}. We conclude that $\rho(M,\cF_{max},\nabla,s)$ can be expressed in terms of the universal $\eta$-invariant introduced in that reference. }
\end{rem}

\subsection{A families $e$-invariant}

Let $$q:W\to B$$ be a proper submersion of relative dimension $p:=\dim(W)-\dim(B)>0$ and consider the vertical foliation $\cF^{v}=T^{v}_{\C}q$ on $W$, see Example \ref{djqlwdqwdqwdq}. This foliation is real. A framing $s$ of $\cF_{\R}^{v}$ induces an orientation $o_{q}$ of the map $q$ for the framed bordism cohomology theory $\Omega^{fr,*}$. We get a class $$[W\stackrel{q}{\to} B,s]=q^{o_{q}}_{!}(1_{S})\in \Omega^{fr,-p}(B)\ ,$$ where $1_{S}\in \Omega^{fr,0}(W)$ is the unit. The construction \eqref{rewrewrrwr324324234234} of Adam's $e$-invariant can be extended from $B=*$ to general $B$
as a map
$$e^{Adams}_{\C}:\Omega^{fr,-p}(B)\to \mathbf{KU}\C/\Z^{-p-1}(B)\ .$$
According to \cite[Definition 5.23]{MR2664467} its value on the class $[W\stackrel{q}{\to} B,s]$ is given by
\begin{equation}\label{ewfwfewfwfwf23434}
e^{Adams}_{\C}([W\stackrel{q}{\to} B,s]):=q_{!}^{\hat o_{s}}(\beins)\in \widehat{KU}^{-p}(B)_{flat}\cong \mathbf{KU}\C/Z^{-p-1}(B)\ ,
\end{equation}
where $\hat o_{s}$ is the $\widehat{KU}$-orientation of $q$ induced by the framing, see Remark \ref{jdlqwkdjlqwdwqd}.

\begin{rem}\label{jdlqwkdjlqwdwqd}{\rm The construction of $\hat o_{s}$ is similar as in Subsection \ref{kdljlqwdqwdqwd}.
The vertical framing induces a spin structure. We choose a fibrewise Riemannian metric and a horizontal distribution.
Then we get a vertical Levi-Civita connection $\nabla^{T^{v}q}$. As explained in Subsection \ref{wlekfjwelfjewlfwef123} we get a d$\widehat{KU}$-orientation $\hat o$. Furthermore, using the trivial connection induced by the framing, we can define the transgression
$$\widetilde{\hA}(\nabla^{T^{v}q},s)\in DD^{per}(W)^{-1}/\im(d)\ .$$
The $\widehat{KU}$-orientation
$$\hat o_{s}:=\hat o-\widetilde{\hA}(\nabla^{T^{v}q},s)$$ is then independent of the choice of the geometric structures.

In order to see that $e^{Adams}_{\C}([W\stackrel{q}{\to} B,s])$ is flat we calculate its curvature using \eqref{r23r23r23r23r23r235435346546} and \eqref{lwefwwfewfewfefewfewfewffwe}
$$R(e^{Adams}_{\C}([W\stackrel{q}{\to} B,s]))=\int_{W/B}(\hA(\hat o_{q})-d \widetilde{\hA}(\nabla^{T^{v}q},s))=0\ .$$

}\end{rem}

Let us now assume that $B$ is closed and has a spin structure. Then the projection $\pi:B\to *$ has a $KU$ orientation $o_{\pi}$.
We choose a Riemannian metric $g^{TB}$ on $B$, a vertical metric $g^{T^{v}q}$, and a horizontal distribution $T^{h}q$. The metric $g^{TB}$ lifts to a metric on the horizontal bundle $T^{h}q$ and induces, together with the vertical metric $g^{T^{v}q}$, a metric on $W$. Furthermore, the spin structure of $B$ induces a spin structure on the horizontal bundle, which together with the framing of $T^{v}q$ provides a spin structure on $W$. Note that $\cF^{\perp}_{\R}\cong T^{h}\pi$. The Levi-Civita connection of $g^{TB}$ pulls back to the connection $\nabla^{\cF^{\perp}_{\R}}$.

\bigskip

We consider a geometric vector bundle $(V,\nabla)$ on $B$.
Then $(\pi^{*}V,\pi^{*}\nabla)$ is a bundle on $W$ and the restriction of $\pi^{*}\nabla$ to $\cF$ is flat, see Example \ref{fjewflewjflewfewf}.

\bigskip

We now assume that $\dim(W)$ is odd.
\begin{lem}
We have
$$\rho(W,\cF^{v},\pi^{*}\nabla,s)=\pi^{o_{\pi}}_{!}(e^{Adams}_{\C}([W\stackrel{q}{\to}B,s])\cup [V])\ .$$
\end{lem}
\proof The geometry on $B$ provides a $\widehat{KU}$-orientation $\hat o_{\pi}$. The geometry on $W$ induces a $\widehat{KU}$-orientation $\hat o_{\pi\circ q}$. In the following calculation we use \cite[Definition 3.22]{MR2664467} and $\hA(\hat o_{\pi})=\hA(\nabla^{\cF_{\R}^{\perp}})$ at the place marked by $!$.
\begin{eqnarray*} \hat o_{\pi}\circ \hat o_{s}&=&\hat o_{\pi}\circ (\hat o-\widetilde{\hA}(\nabla^{T^{v}q},s))\\&=&\hat o_{\pi}\circ \hat o-\hA(\hat o_{\pi})\wedge \widetilde{\hA}(\nabla^{T^{v}q},s)\\&\stackrel{!}{=}&\hat o_{\pi\circ q}-\hA\nabla^{\cF_{\R}^{\perp}})\wedge \widetilde{\hA}(\nabla^{T^{v}q},s)-\widetilde{\hA}(\nabla^{LC} , \nabla^{T^{v}q}\oplus \nabla^{\cF_{\R}^{\perp}} )\\&=&\hat o_{\pi\circ q}-\widetilde{\hA}(\nabla^{T^{v}q}\oplus \nabla^{\cF_{\R}^{\perp}}, \nabla^{triv,s}\oplus \nabla^{\cF^{\perp}_{\R}})-
\widetilde{\hA}(\nabla^{LC} , \nabla^{T^{v}q}\oplus \nabla^{\cF_{\R}^{\perp}} ) \\&=&\hat o_{\pi\circ q}-
\widetilde{\hA}(LC,s)\ .\end{eqnarray*}
We now use that integration is compatible with the identification
$KU\C/\Z^{*-1}\cong \widehat{KU}^{*}_{flat}$.
We get
\begin{eqnarray*}
\pi^{o_{\pi}}_{!}(e^{Adams}_{\C}([W\to B,s])\cup [V])&=&
\pi^{\hat o_{\pi}}_{!}(e^{Adams}_{\C}([W\to B,s])\cup [V,\nabla])
\\&\stackrel{\eqref{ewfwfewfwfwf23434}}{=}&
\pi^{\hat o_{\pi}}_{!}(q_{!}^{\hat o_{s}}(\pi^{*}[V,\nabla]))\\&\stackrel{\eqref{e23e23e3e23e32e32e2}}{=}&(\pi\circ q)^{\hat o_{\pi}\circ \hat o_{s}}(\pi^{*}[V,\nabla])\\
&=&(\pi\circ q)_{!}^{\hat o_{\pi\circ q}-
\widetilde{\hA}(LC,s)}(\pi^{*}[V,\nabla])\\
&=&\rho(M,\cF,\pi^{*}\nabla,s)
\end{eqnarray*} \hB

\subsection{The dependence on the framing}

Let $s,s^{\prime}$ be two stable framings of a foliation $\cF_{\R}$. Then we get two connections
$\nabla^{s}$ and $\nabla^{s^{\prime}}$ on $\cF_{\R}\oplus \R^{n}$. Since these connections are flat, by \eqref{1e1h2ekj12ej12ke21hek2eh} we get a cohomology class
$$\widetilde{\hA}(\nabla^{s^{\prime}},\nabla^{s})\in H^{-1}(DD^{per}(M))\ .$$

\begin{ddd}
For every class $u\in KU^{0}(M)$ we define the relative $e$-invariant of the pair
$(s^{\prime},s)$ of stable framings of $\cF_{\R}$ by $$e_u(s^{\prime},s)=[\int_{M}\widetilde{\hA}(\nabla^{s^{\prime}},\nabla^{s})\cup \ch(u)]\in \C/\Z\ .$$
\end{ddd}

\begin{rem}{\rm If $\cF=\cF_{max}$, then
$$e_{1}(s^{\prime},s)=e^{Adams}_{\C}([M,s^{\prime}])-e^{Adams}_{\C}([M,s])\ .$$
In this case $e_{1}(s^{\prime},s)$ takes values in the well-known finite subgroup $\im(e^{Adams}_{\C})\subseteq \C/\Z$ calculated by Adams.
}
\end{rem}

The proof of the following proposition is a straightforward calculation.
\begin{prop}
We adopt the assumptions of Definition \ref{flwefjwefewff} and assume that $s,s^{\prime}$ are stable framings of $\cF_{\R}$.
Then we have
$$\rho(M,\cF,\nabla^{I},s^{\prime})-\rho(M,\cF,\nabla^{I},s)=e_{[V]}(s^{\prime},s)\ .$$
\end{prop}

\subsection{Real and imaginary parts}

\subsubsection{The decomposition}
In this subsection we discuss the components $\rho(\dots)^{\R/\Z}$ and $\rho(\dots)^{i\R}$ of
$\rho(M,\cF,\nabla^{I},s)$ associated to the decomposition of the target group $$\C/\Z\cong \R/\Z\oplus i\R\ , \quad x=x^{\R/\Z}+x^{i\R}$$ into the real and the imaginary parts.

\bigskip

We adopt the assumptions made in Definition \ref{flwefjwefewff}. In addition we choose a hermitean metric $h^{V}$ on the complex vector bundle $V$. Then we can define the adjoint connection $\nabla^{*}$ of $\nabla$ (see Remark \ref{dkjqwlkdjqwlkjdlwqkdwdwqdqwd}) and its unitarization $$\nabla^{u}:=\frac{1}{2}(\nabla+\nabla^{*})\ .$$ with respect to $h^{V}$.

\bigskip

We use \eqref{kdqklwdjqwlkdjqlwdkwqd89798} in order to write \begin{equation}\label{gdhqgdhjgwqdjhgqwd7987}
[V,\nabla]=[V,\nabla^{u}]+a(\widetilde{\ch}(\nabla,\nabla^{u}))\ .
\end{equation}
Then we calculate
\begin{eqnarray}\lefteqn{\rho(M,\cF,\nabla^{I},s)}&&\nonumber\\&\stackrel{\eqref{kdklqwjdlqwjdwldqwdqwdqd}}{=}&\pi^{\hat o}_{!}([V,\nabla])-[\int_{M} \widetilde{\hA}(LS,s)\wedge \ch(\nabla)]_{\C/\Z}\nonumber\\&\stackrel{\eqref{gdhqgdhjgwqdjhgqwd7987}}{=}&\pi^{\hat o}_{!}([V,\nabla^{u}])+[\int_{M} \hA(\nabla^{LC})\wedge \widetilde{\ch}(\nabla,\nabla^{u})]_{\C/\Z} - [\int_{M} \widetilde{\hA}(LS,s)\wedge \ch(\nabla)]_{\C/\Z}\nonumber\\
&\stackrel{\eqref{hfjwkjehfkjwehfewfewf897987}}{=}&\pi^{\hat o}_{!}([V,\nabla^{u}])+[\int_{M} \hA(\nabla^{LC})\wedge \widetilde{\ch}(\nabla,\nabla^{u})]_{\C/\Z}\nonumber\\&& - [ \int_{M} \widetilde{\hA}(LS,s)\wedge (\ch(\nabla^{u})+d\widetilde{\ch}(\nabla,\nabla^{u})) ]_{\C/\Z}\nonumber\\&=&
\pi^{\hat o}_{!}([V,\nabla^{u}])-[\int_{M} \widetilde{\hA}(LS,s)\wedge \ch(\nabla^{u}) ]_{\C/\Z} +[\int_{M} \hA(\nabla^{\cF^{\perp}_{\R}})\wedge \widetilde{\ch}(\nabla,\nabla^{u})]_{\C/\Z} \label{dlkqwdqwdqwdqwdwqdqw}\end{eqnarray}
using partial integration and Stoke's theorem in the last step.

The first two summands in \eqref{dlkqwdqwdqwdqwdwqdqw} are real. The following is the decomposition of the transgression Chern form into the real and imaginary part (we use \eqref{fewfwefwefwefew32434234234} and \eqref{qwdqwdqwdwqdqwdwqdqwdqwd}):
$$ \widetilde{\ch}(\nabla,\nabla^{u})=\frac{ \widetilde{\ch}(\nabla,\nabla^{u})+ \widetilde{\ch}(\nabla^{*},\nabla^{u})}{2}+\frac{\widetilde{\ch}(\nabla,\nabla^{*})}{2}\ .$$
We get
\begin{eqnarray}
\rho(M,\cF,\nabla^{I},s)^{\R/\Z}&=&\pi^{\hat o}_{!}([V,\nabla^{u}])-[\int_{M} \widetilde{\hA}(LS,s)\wedge \ch(\nabla^{u}) ]_{\R/\Z} \\&&+[\int_{M} \hA(\nabla^{\cF^{\perp}_{\R}})\wedge \frac{ \widetilde{\ch}(\nabla,\nabla^{u})+ \widetilde{\ch}(\nabla^{*},\nabla^{u})}{2}]_{\R/\Z}\nonumber\\[1cm]
\rho(M,\cF,\nabla^{I},s)^{i\R} &=& \int_{M} \hA(\nabla^{\cF^{\perp}_{\R}})\wedge \frac{\widetilde{\ch}(\nabla,\nabla^{*})}{2} \label{fjilfewfwefffef}
\ .\end{eqnarray}

\subsubsection{The imaginary part}

We see that the imaginary part $\rho(M,\cF,\nabla^{I},s)^{i\R}$ is just a characteristic number which can be calculated as an integral over locally computable quantities. It does not depend on the framing.

\begin{ex}{\rm We assume that $\nabla^{I}$ is unitary with respect to the metric $h$. Then we can take for $\nabla$ the unitary extension constructed in Lemma
\ref{dhqwkdqkwddqwdwqdioipopioipoopi}. With this choice we have $\nabla= \nabla^{u}$.
\begin{kor}
If $\nabla$ is the unitary extension of $\nabla^{I}$, then $$\rho(M,\cF,\nabla^{I},s)^{i\R}=0\ .$$
In particular, if $2\mathrm{codim}(\cF)<\dim(M)$ and $\nabla^{I}$ is unitary, then $\rho(M,\cF,\nabla^{I},s)^{i\R}=0$. \end{kor}\proof The first assertion follows from \eqref{fjilfewfwefffef}, $\nabla=\nabla^{*}$ and the second equality in \eqref{fewfwefwefwefew32434234234}. The second assertion is then a consequence of the first and Corollary \ref{djwqdlqwjdkwqdwqdqd}. \hB
}\end{ex}

\bigskip

\begin{ex}{\rm For example, if $\pi:\tilde M\to M$ is a finite covering of degree $[\tilde M:M]\in \nat$, then we have the identity
\begin{equation}\label{dwddwqqdqwd342423424324dasd}
\rho(\tilde M,\pi^{*}\cF,\pi^{*}\nabla^{I},\pi^{*}s)^{i\R}=[\tilde M:M] \rho(M,\cF,\nabla^{I},s)^{i\R}\ .
\end{equation}
}\end{ex}

\bigskip

Given a foliated manifold $(M,\cF)$ we have an associated bundle $\cF^{\perp}$ with a flat partial connection $\nabla^{I,\cF}$. If we apply $\rho(\dots)^{i\R}$ to $(V,\nabla^{I})=(\cF^{\perp},\nabla^{I,\cF})$ or a bundle obtained from this by some operation of tensor calculus we get an invariant of the foliation $(M,\cF)$.

\begin{ex}\label{fkllwefwefewf}{\rm In this example, for even $n\in \Z$, we consider a $2n+1$-dimensional closed oriented manifold $M$ with a real foliation $\cF$ of codimension $1$. We assume that $\cF_{\R}$ is co-oriented. Furthermore, we assume that $TM$ has a stable framing $s_{M}$ and a Riemannian metric $g^{TM}$.
The co-orientation of $\cF_{\R}$ induces a framing $s^{\perp}$ of $\cF^{\perp}_{\R}$ by the positive normal unit vector field $N$.
There is then a unique stable framing of $\cF_{\R}$ such that $s\oplus s^{\perp}\sim s_{M}$.

For $(V,\nabla^{I})$ we take $(\cF^{\perp},\nabla^{I,\cF^{\perp}})$. Let $\mathbf{GV}_{2n+1}(\cF)\in H^{2n+1}(M;\R)$ be the Godbillon-Vey class of the foliation.

\begin{lem}
We have
$$\rho(M,\cF,\nabla^{I},s)^{i\R}=\frac{(-1)^{n+1}}{(2\pi i)^{n+1} n!}\int_{M} \mathbf{GV}_{2n+1}(\cF)\ .$$
\end{lem}
\proof Since $\dim(\cF_{\R}^{\perp})=1$ we have $ \hA_{4p}(\nabla^{\cF^{\perp}_{\R}})\in F^{2p}\Omega^{4p}(M,\cF)=0$ for all $p\ge 1$.
Hence \eqref{fjilfewfwefffef} specializes to
$$\rho(M,\cF,\nabla^{I,\cF^{\perp}},s)^{i\R}=\frac{1}{2}\int_{M} \widetilde{\ch}_{2n+2}(\nabla,\nabla^{*})\ .$$
So we must identify $\widetilde{\ch}_{2n+1}(\nabla,\nabla^{*})$ with a multiple of $\mathbf{GV}_{2n+1}(\cF)$.

\bigskip

We first recall the definition of the Godbillon-Vey class $\mathbf{GV}_{2n+1}(\cF)$.
Since $\cF_{\R}$ is co-oriented there exists a real nowhere vanishing one-form $\kappa\in \Omega^{1}(M)$ such that
$\cF_{\R}=\ker(\kappa)$. Integrability of $\cF_{\R}$ translates to the relation $\kappa\wedge d\kappa=0$. We can choose a real $1$-form
$\omega\in \Omega^{1}(M)$ such that $d\kappa=\kappa\wedge \omega$.
Note that $ \omega$ is unique up to multiples of $\kappa$. Then the form $\omega\wedge d\omega^{n}\in \Omega^{2n+1}(M)$ is closed and represents the Godbillon-Vey class $\mathbf{GV}_{2n+1}(\cF)$.

\bigskip

Using the unit normal vector field $N\in \Gamma(M,TM)$
we can normalize $\kappa$ such that $ \kappa(N)=1$. Let $\omega$ be as above. We take $\omega$ as a connection one-form for a connection $\nabla$ on $\cF^{\perp}_{\R}$ with respect to the trivialization by $N$.
For a section $X$ of $TM$ we have by definition
$$\nabla_{X}N=\omega(X)N\ .$$
On the other hand, if $X$ is a section of $\cF$, then we have by Cartan's formula
$$\omega(X)=(\kappa\wedge \omega)(N,X)=d\kappa(N,X)= N\kappa(X)-X\kappa(N)-\kappa([N,X])=\kappa([X,N])\ .$$
In view of the description of $\nabla^{I,\cF^{\perp}_{\R}}$ given in Example \ref{fklwefjwefewf} this implies that the connection $\nabla$ extends the flat partial connection $\nabla^{I,\cF^{\perp}_{\R}}$.

\bigskip

We have
$$\frac{(-1)^{n+1}}{(2\pi i)^{n+1} n!}\omega\wedge (d\omega)^{n}= \widetilde{\ch}_{2n+2}(\nabla,\nabla^{triv})\ .$$
Similarly,
$$(-1)^{n+1}\frac{(-1)^{n+1}}{(2\pi i)^{n+1} n!}\omega\wedge (d\omega)^{n}= \widetilde{\ch}_{2n+2}(\nabla^{*},\nabla^{triv})\ .$$
Hence, if $n$ is even, then by taking the difference of these two equations we get
$$\frac{2(-1)^{n+1}}{(2\pi i)^{n+1} n!}\mathbf{GV}_{2n+1}(\cF)= \widetilde{\ch}_{2n+2}(\nabla ,\nabla^{*})\ .$$

\hB
}
\end{ex}

\begin{rem}\label{ergegojerglerogergeg}{\rm As noted above we can take $(V,\nabla^{I}):=(\cF^{\perp},\nabla^{I, \cF^{\perp}})$
in order to define an invariant which only depends on the foliation $\cF$.
In this example assume that $\cF$ is real and that
$\nabla^{\cF^\perp}$ is the complexification of a connection $\nabla^{\cF_{\R}^{\perp}}$ extending $\nabla^{I,\cF^{\perp}_{\R}}.$ We choose in addition a metric $h^{\cF^{\perp}_{\R}}$ in order to define the adjoint $\nabla^{\cF^{\perp},*} $.
In this remark we explain the place of $$\rho(M,\cF,\nabla^{I,\cF^{\perp}},s)^{i\R}= \int_{M} \hA(\nabla^{\cF^{\perp}_{\R}})\wedge \frac{\widetilde{\ch}(\nabla^{\cF^{\perp}},\nabla^{\cF^{\perp},*})}{2}$$ in the classification of foliation invariants defined in terms of secondary characteristic classes of foliations.

\bigskip

We start with the classification of characteristic forms for foliations of codimension $q\in \nat$ \cite{MR0307250}, see also \cite{MR512428}. Let
$q^{\prime}\in \nat$ be the smallest odd integer $\le q$.
One defines the commutative graded algebra
$$WO_{q}:=\R[\tilde c_{1},\dots,\tilde c_{q^{\prime}}]\otimes \R[c_{1}, ,\dots,c_{q}]^{\le 2q}\ ,$$
where the degrees of the generators are given by $$|\tilde c_{i}|=2i-1\ , \quad \mbox{$i$ odd, $\quad \quad $ and }\quad \quad \quad |c_{i}|=2i$$
and the superscript $[-]^{\le 2q}$ indicates that we take only polynomials of degree less than $2q$.

On this ring we consider the differential $d$ given by
$$d\tilde c_{i}:=c_{i}\ ,\quad dc_{i}=0\ .$$
The cohomology $H^{*}(WO_{q})$ of this DGA classifies secondary characteristic classes for foliations of codimension $q$.
For a cohomology class $[U]\in H^{q}(WO_{q})$ we let $\Delta([U])\in H^{*}(M;\R)$ denote the corresponding cohomology class.

\bigskip

In the following we describe $\Delta$ on the form level. Since $\nabla^{\cF^{\perp}}$ and $\nabla^{\cF^{\perp},*}$ are complexifications of connections which are dual to each other on a real bundle we have
$$\ch_{2n}(\nabla^{\cF^{\perp},*})=(-1)^{n} \ch_{2n}(\nabla^{\cF^{\perp}})\ .$$
By \eqref{hfjwkjehfkjwehfewfewf897987}
we get for odd $n$
$$d \frac{1}{2i^{n}}\widetilde{\ch}_{2n}(\nabla^{\cF^{\perp}}, \nabla^{\cF^{\perp},*})=\frac{1}{i^{n}}\ch_{2n}(\nabla^{\cF^{\perp}})\ .$$
Therefore the connection $\nabla^{\cF^{\perp}}$ together with a choice of a metric $h^{\cF^{\perp}_{\R}}$ induces a map of commutative differential graded algebras
$$\Delta_{(\nabla^{\cF^{\perp}},h^{\cF^{\perp}_{\R}})}:WO_{q}\to \Omega(M)\ ,$$ by $$\Delta_{( \nabla^{\cF^{\perp}} ,h^{\cF^{\perp}_{\R}})}(\tilde c_{i}):=\frac{1}{2i^{n}}\widetilde{\ch}_{2n}(\nabla^{\cF^{\perp}}, \nabla^{\cF^{\perp},*})\ , \quad \Delta_{( \nabla^{\cF^{\perp}} ,h^{\cF^{\perp}_{\R}})} (c_{n}):=\frac{1}{i^{n}}\ch_{2n}(\nabla^{\cF^{\perp}})\ .$$

Then for $[U]\in H^{*}(WO_{q})$ the characteristic class $\Delta([U])\in H^{*}(M;\R)$ of the foliation $\cF$ is given by
\begin{equation}\label{qwdqwdwqdqwdqwdqwd2312343534tfwrvwfvwfwef}
\Delta([U]):=[\Delta_{( \nabla^{\cF^{\perp}} ,h^{\cF^{\perp}_{\R}})}(U)] \ .
\end{equation}

\bigskip

There is a universal polymonial
$ A(c_{1},\dots,c_{q})\in \R[c_{1},\dots,c_{q}]^{\le 2q}$ such that
$$\hA(\nabla^{\cF^{\perp}_{\R}})^{\le 2q}= A(\ch_{2}(\nabla^{\cF^{\perp}}),\dots,\ch_{2q}(\nabla^{\cF^{\perp}}))\ .$$
We consider \begin{equation}\label{fwefwefewfewfewfwefew53453453455}
U:=\left[\left(\sum_{i=1, odd}^{q^{\prime}} (-1)^{\frac{i+1}{2}} \tilde c_{i}\right) A(c_{1},\dots,c_{q}) \right]_{\dim(M)}\in WO^{\dim(M)}_{q}\ .
\end{equation}

If $2q<\dim(M)$, then $U$ is a cycle. \begin{lem} Let $\cF$ be a real foliation of codimension $q$ such that $2q<\dim(M)$. Then the class $$[U]\in H^{\dim(M)}(WO_{q})$$
is the universal class classifying the imaginary part of $\rho(M,\cF,\nabla^{I,\cF^{\perp}},s)$.
\end{lem}
\proof The relation
$$\rho(M,\cF,\nabla^{I,\cF^{\perp}},s)^{i\R} =i\langle \Delta([U]),[M]\rangle$$ follows immediately from \eqref{fwefwefewfewfewfwefew53453453455}, the definition \eqref{qwdqwdwqdqwdqwdqwd2312343534tfwrvwfvwfwef} of $\Delta([U])$ and \eqref{fjilfewfwefffef}.
\hB

Let us assume that $p$ is odd and $2p-1> q$. Then $d\tilde c_{p}=0$ and we have the cohomology class $[\tilde c_{p}]\in H^{2p-1}(WO_{q})$.
If the foliation $\cF$ is real, then the characteristic class
\eqref{kfkwejwlkefjlwekfjewfewfopipoi234} is given by \begin{equation}\label{ewfwefewfwf432342344123}
[c_{2p-1}(\nabla^{I,\cF^{\perp}})]=2i^{p}\Delta[\tilde c_{p}]\ .
\end{equation}

}
\end{rem}

\subsubsection{The real part}

The real part $\rho(M,\cF,\nabla^{I},s)^{\R/\Z}$ is more complicated and of global nature.
A good case to look at is discussed in Example \ref{kdljlqwdqwdqwd}.

\begin{ex}{\rm The following example shows that $\rho(M,\cF,\nabla^{I},s)$ is not an integral over $M$ of locally determined quantities.
We consider the manifold $M:=S^{1}$ with the maximal foliation $\cF_{max}=T_{\C}S^{1}$. The framing $s$ of $TS^{1}$ is the bounding framing so that $[S^{1},s]=0$ in $\Omega_{1}^{fr}$. Furthermore we let $\bV(r):=(V,h,\nabla(r))$
be flat line bundle with holonomy $\exp(2\pi i r)$ for $r\in [0,1)$. Then we can apply \eqref{mkmxlkmlqwxqwx} and \eqref{jhdjkehdkwwedewd} and get
$$\rho(S^{1},\cF_{max},\nabla(r),s)=\rho(\nabla)=\xi(\slashed{D}\otimes \bV(r))-\xi(\slashed{D})\ .$$
In this case the reduced $\eta$-invariant can be calculated explicitly. The result is
$$\xi(\slashed{D}\otimes \bV(r))=[-r]_{\C/\Z}\ .$$
Hence we get
$$\rho(S^{1},\cF_{max},\nabla(r),s) =[-r]_{\C/\Z}\ .$$
In particular, our invariant depends non-trivially on $r$.
The data $(S^{1},\cF_{max},\nabla(r),s)$ for different $r$ are locally isomorphic.

\bigskip

Note that in this example the analog of \eqref{dwddwqqdqwd342423424324dasd} nevertheless holds true.
}
\end{ex}

\section{Factorization over algebraic $K$-theory of smooth functions}\label{keklwfewfewfewf}

Let $P$ a closed $p$-dimensional manifold and $s$ be a stable framing of $TP$. For a manifold $X$
we consider a product of foliated manifolds \begin{equation}\label{gergergergregegrgerg3453435}
(M,\cF):=(P\times X,T_{\C}P\boxplus 0)=(P,\cF_{max})\times (X,\cF_{min})
\end{equation} and a pair $(V,\nabla^{I})$ of a complex vector bundle and a flat partial connection on $(M,\cF)$. We will show that the data represents an algebraic $K$-theory class $$f^{o_{s}}_{!}(
[V,\nabla^{I}]^{alg})\in K_{p}(C^{\infty}(X))$$ of the ring $C^{\infty}(X)$.
If we assume that $X$ is closed, spin and that $\dim(X)<p$, then our main result is the equality $$\rho(M,\cF,\nabla^{I},s)=\pi_{!}^{o}({\tt reg}_{X}(f^{o_{s}}_{!}([V,\nabla^{I}]^{alg})))\ ,$$
where $${\tt reg}_{X}:K_{p}(C^{\infty}(X))\to {\mathbf{ku}}\C/\Z^{-p-1}(X)$$ is the regulator and the map $$\pi:X\to *$$ has the ${\mathbf{ku}}$-orientation from the spin structure of $X$.

\subsection{Statement of the result}\label{}

For manifolds $X$ and $P$ we consider the foliated manifold \eqref{gergergergregegrgerg3453435}.
From the point of view of foliation theory it is very trivial. The leaves of the foliation on the product $M=P\times X$ are just the submanifolds $P\times \{x\}$ for all $x\in X$.
\bigskip

We assume that $P$ is closed and that the tangent bundle $TP$ of $P$ is equipped with a stable framing $s$. The framing $s$ induces an orientation $o_{s}$ of the map $f:P\to *$ for the stable cohomotopy theory, the cohomology theory represented sphere spectrum $\bS$ (or equivalently, the framed bordism theory).
Any spectrum $\bE$ is a module spectrum over $\bS$. Consequently $f$ has an induced orientation for the cohomology theory $\bE^{*}$ which we denote by the same symbol $o_{s}$.
We have an Umkehr or integration map between cohomology groups
$$ f_{!}^{o_{s}}:\bE^{*}(P)\to \bE^{*-p}(*)\ ,$$
where $p:=\dim(P)$.
We will apply this to the cohomology theory ${\mathbf{K}}(C^{\infty}(X))^{*}$ represented by the connective algebraic $K$-theory spectrum ${\mathbf{K}}(C^{\infty}(X))$ of the ring of complex-valued smooth functions on the manifold $X$.

\bigskip

We start with the class $$[V,\nabla^{I}]^{alg}\in {\mathbf{K}}(C^{\infty}(X))^{0}(P) $$ (see Definition \ref{xgrergegg} for a technical description) represented by a pair $(V,\nabla^{I})$ of a complex vector bundle on the foliated manifold \eqref{gergergergregegrgerg3453435} and a flat partial connection.
We can form the algebraic $K$-theory class \begin{equation}\label{ghjqwdgjqwdwqdwqdqdqdqd}
f^{o_{s}}_{!}([V,\nabla^{I}]^{alg})\in {\mathbf{K}}(C^{\infty}(X))^{-\dim(P)}(*)=K_{\dim(P)}(C^{\infty}(X))\ .
\end{equation}

We now assume that $X$ is closed and spin. We further assume that $\dim(P)+\dim(X)$ is odd and that $\dim(X)<\dim(P)$, or equivalently, $$2\mathrm{codim}(\cF)<\dim(M)\ .$$ Then by Corollary \ref{djwqdlqwjdkwqdwqdqd} the invariant $\rho(M,\cF,\nabla^{I},s)\in \C/\Z$ is well-defined and independent of additional geometric choices.

The main result of the present section shows that $\rho(M,\cF,\nabla^{I},s)$ can be expressed in terms of the class
\eqref{ghjqwdgjqwdwqdwqdqdqdqd}.
In greater detail, for every $n\in \nat$ with $n> \dim(X)$ we will construct, using methods from differential cohomology theory, a natural regulator
$${\tt reg}_{X}:K_{n}(C^{\infty}(X))\to {\mathbf{ku}}\C/\Z^{-n-1}(X)\ ,$$
see Definition \ref{klfwefewfewfwf}.
Let $\pi:X\to *$ be the projection. The spin structure on $X$ induces an orientation $o$ for the periodic complex topological $K$-theory $\mathbf{KU}$, and hence for the $\mathbf{KU}$-modules ${\mathbf{ku}}$ and ${\mathbf{ku}}\C/\Z$.
We use the isomorphisms
$${\mathbf{ku}}^{k}\cong \left\{\begin{array}{cc}\Z&k\in 2\nat\\ 0&else\end{array}\right.\ , \quad {\mathbf{ku}}\C/\Z^{k}\cong \left\{\begin{array}{cc}\C/\Z&k\in 2\nat\\ 0&else\end{array}\right.$$
in order to interpret elements in ${\mathbf{ku}}\C/\Z^{2*}(*)$ (e.g. the left-hand side of \eqref{wefwefefewfwfewfwewefwefwef}) as elements of $\C/\Z$.
\begin{theorem}\label{flkfefwefwefewfef} We have the relation
\begin{equation}\label{wefwefefewfwfewfwewefwefwef}\pi^{o}_{!}({\tt reg}_{X}(f^{o_{s}}_{!}([V,\nabla^{I}]^{alg})))=\rho(M,\cF,\nabla^{I},s)\ .\end{equation}
\end{theorem}
The proof of this Theorem will be finished in Subsection \ref{fwklfwfewfewfewf}.

\begin{rem}{\rm Every class $x\in K_{*}(C^{\infty}(X))$ can be presented in the form \eqref{ghjqwdgjqwdwqdwqdqdqdqd} for suitable stably framed manifolds $P$ and pairs $(V,\nabla^{I})$. Indeed, the class $x$ can be thought of being represented by a map $x:S^{n}\to BGL(C^{\infty}(X) )^{+}$, where we consider $GL(C^{\infty}(X) )$ as a discrete group and $+$ stands for Quillen's $+$-construction. Using the standard stable framing $s_{can}$ of $S^{n}$ the triple $(S^{n},x,s_{can})$ represents a framed bordism class
$[S^{n},x,s_{can}]\in \Omega^{fr}_{n} (BGL(C^{\infty}(X) )^{+})$. Since the $+$-construction map
$$p:BGL(C^{\infty}(X) )\to BGL(C^{\infty}(X) )^{+}$$ induces an isomorphism in generalized homology theories there exists a unique class
$[P,y,s]\in \Omega^{fr}_{n}(BGL(C^{\infty}(X) ))$ such that $p_{*}([P,y,s])=[S^{n},x,s_{can}]$. Since $P$ is compact, there exists a factorization of $y$ as
$$P\stackrel{\tilde y}{\to} BGL(N,C^{\infty}(X) )\to BGL(C^{\infty}(X) )$$ for a suitable $N\in \nat$. The map $\tilde y$
classifies a pair $(V,\nabla^{I})$ over $P\times X$ of an $N$-dimensional complex vector bundle with a flat partial connection in the $P$-direction. We then have
$$f_{!}^{o_{s}}([V,\nabla^{I}]^{alg})=x\ .$$ \hB
}
\end{rem}

\subsection{Algebraic $K$-theory sheaves}
\label{klfwjlefewfewf}

We consider the site $\Mf_{\C-fol}$ of pairs $(M,\cF)$ of manifolds $M$ with a foliation $\cF$ and foliated maps (see Section \ref{fewl453534535435} for definitions).
The topology given by open coverings. We have a morphism of sites \begin{equation}\label{ewfwfwefewfewfwfwefwfe}
\Mf_{\C-fol}\to \Mf
\end{equation} which forgets the foliations.

In the following we work in the framework of $\infty$-, or more precisely, of $(\infty,1)$-categories developed by Joyal, Lurie and others \cite{HTT}, \cite{HA}. We refer to \cite[Sec. 2.1]{Bunke:2014aa}, \cite[Sec. 2]{2013arXiv1311.3188B}
and \cite[Sec.4]{2012arXiv1208.3961B} for an introduction to the language as we will use it here and for further references. We will not discuss the size issues. They can be solved in the standard way for the examples used in the present paper.

\bigskip

For a presentable $\infty$-category $\bC$ and a site $\mathbf{M}$ we consider the category
${\mathbf{PSh}}_{\bC}(\mathbf{M})$ of $\bC$-values presheaves and its full subcategory of sheaves $\Sh_{\bC}(\mathbf{M})$.
They are related by an adjunction \begin{equation}\label{dqdqwdwqdwqdwqdwqdqd}
L:{\mathbf{PSh}}_{\bC}(\mathbf{M})\leftrightarrows\Sh_{\bC}(\mathbf{M}):inclusion\ ,
\end{equation}

where $L$ is called the sheafification.
\bigskip

We consider the $1$-category of categories ${\mathbf{Cat}}$ with its cartesian symmetric monoidal structure.
For the class $W$ of categorical equivalences we form the symmetric monoidal $\infty$-category
${\mathbf{Cat}}[W^{-1}]$. By ${\mathbf{CAlg}}({\mathbf{Cat}}[W^{-1}])$ we denote the category of commutative algebras in ${\mathbf{Cat}}[W^{-1}]$.
\begin{rem}\label{dkwqnmqlkwdwqdwqdqd}{\rm A commutative monoid can be considered as a symmetric monoidal category with only unit morphisms.
It is an object of ${\mathbf{CAlg}}({\mathbf{Cat}})$ and therefore represents one in ${\mathbf{CAlg}}({\mathbf{Cat}}[W^{-1}])$. A general symmetric monoidal category has non-identity associator and commutativity constraints and is therefore not a commutative algebra in ${\mathbf{Cat}}$. But it naturally represents an object in ${\mathbf{CAlg}}({\mathbf{Cat}}[W^{-1}])$.

}
\end{rem}
The objects of ${\mathbf{PSh}}_{{\mathbf{CAlg}}({\mathbf{Cat}}[W^{-1}])} (\mathbf{M})$ are called symmetric monoidal prestacks. Similarly, objects in $\Sh_{{\mathbf{CAlg}}({\mathbf{Cat}}[W^{-1}])}(\mathbf{M})$ are called symmetric monoidal stacks.

\bigskip

We consider the following four symmetric monoidal stacks on $\Mf$ or $\Mf_{\C-fol}$ of vector bundles with additional structures. The monoidal structure is always given by the direct sum.

\begin{enumerate}
\item For a manifold $M$
we let ${\mathbf{Vect}} (M )$ denote the category of vector bundles $V\to M$. A map $f:M^{\prime}\to M$ induces a functor $f^{*}:{\mathbf{Vect}} (M^{\prime} )\to {\mathbf{Vect}} (M )$. We get a stack ${\mathbf{Vect}} $ on the site $\Mf $ with respect to the topology of open coverings.
We use the same symbol for its pull-back to the site $\Mf_{\C-fol}$ along \eqref{ewfwfwefewfewfwfwefwfe}.
\item We let ${\mathbf{Vect}}^{\nabla} (M)$ denote the category of pairs ($V,\nabla)$ of a vector bundle $V\to M$ and a connection. A map $f: M^{\prime} \to M $ induces a functor $f^{*}:{\mathbf{Vect}}^{\nabla} (M^{\prime} )\to {\mathbf{Vect}}^{\nabla} (M )$. We get a symmetric monoidal stack ${\mathbf{Vect}}^{\nabla} $ on the site $\Mf $. We use the same symbol for its pull-back to the site $\Mf_{\C-fol}$ along \eqref{ewfwfwefewfewfwfwefwfe}.
\item For a foliated manifold $(M,\cF)$ we let ${\mathbf{Vect}}^{\flat}(M,\cF)$ denote the category of pairs $(V,\nabla^{I})$ of a vector bundles $V\to M$ and a flat partial connection $\nabla^{I}$ on $V$, see Section \ref{fhfjlwefkjfewfewfewfwf}. A foliated map $f:(M,\cF)\to (M^{\prime},\cF^{\prime})$ induces a functor $f^{*}:{\mathbf{Vect}}^{\flat}(M^{\prime},\cF^{\prime})\to {\mathbf{Vect}}^{\flat}(M,\cF)$. We get a stack ${\mathbf{Vect}}^{\flat}$ on the site $\Mf_{\C-fol}$. \item We let ${\mathbf{Vect}}^{\flat,\nabla}(M,\cF)$ denote the category of pairs $(V,\nabla)$ of a vector bundle $V\to M$ and a connection $\nabla$ on $V$ which is flat in the direction of the foliation. A foliated map $f$ as above induces a functor $f^{*}:{\mathbf{Vect}}^{\flat,\nabla}(M^{\prime},\cF^{\prime})\to {\mathbf{Vect}}^{\flat,\nabla}(M,\cF)$. We get a symmetric monoidal stack ${\mathbf{Vect}}^{\flat,\nabla}$ on the site $\Mf_{\C-fol}$. \end{enumerate}

There is a commutative diagram of forgetful maps in $\Sh_{{\mathbf{CAlg}}({\mathbf{Cat}}[W^{-1}])} (M,\cF)$: \begin{equation}\label{wsdqwdwqdqwdwqdsq} \xymatrix{{\mathbf{Vect}}^{\flat,\nabla}\ar[d]\ar[r]&{\mathbf{Vect}}^{\flat}\ar[d]\\{\mathbf{Vect}}^{\nabla}\ar[r]&{\mathbf{Vect}}}\end{equation}
We now apply the $K$-theory machine $\mathcal{K}$ (see \cite[Def. 6.1]{2013arXiv1311.3188B} and Remark \ref{flwjfklefewfwfewfwfw234244}) and get a commutative diagram of presheaves of spectra \begin{equation}\label{f4fwefwefewfwf}
\xymatrix{\mathcal{K}({\mathbf{Vect}}^{\flat,\nabla}) \ar[d]\ar[r]&\mathcal{K}({\mathbf{Vect}}^{\flat}) \ar[d]\\\mathcal{K}({\mathbf{Vect}}^{\nabla})\ar[r]\ar[d]&\mathcal{K}({\mathbf{Vect}})\ar[d]\\ \hat {\mathbf{ku}}^{\nabla}\ar[r]& \hat {\mathbf{ku}} }
\end{equation}
in ${\mathbf{PSh}}_{\Sp}(\Mf_{\C-fol})$.
The upper square in \eqref{f4fwefwefewfwf} is by definition the image of \eqref{wsdqwdwqdqwdwqdsq} under $\mathcal{K}$.
The lower horizontal map is defined by applying the sheafification $L$ (see \eqref{dqdqwdwqdwqdwqdwqdqd}) to the middle horizontal arrow and the lower vertical arrows are the units of the sheafification.
In particular, we use the notation \begin{equation}\label{wqdwqddq21321}
\hat {\mathbf{ku}}^{\nabla}:=L(\mathcal{K}({\mathbf{Vect}}^{\nabla}))\ , \quad \hat {\mathbf{ku}}:=L(\mathcal{K}({\mathbf{Vect}}))\ .
\end{equation}

\begin{rem}\label{flwjfklefewfwfewfwfw234244}{\rm For the sake of the reader let us indicate some details on the $K$-theory machine $\mathcal{K}$. It is the composition \begin{eqnarray*}&&
{\mathbf{CAlg}}({\mathbf{Cat}}[W^{-1}])\to {\mathbf{CAlg}}(\mathbf{Groupoids}[W^{-1}])\to {\mathbf{CommMon}}(\sSet[W^{-1}])\\&&\hspace{4cm}\to {\mathbf{CommGroup}}(\sSet[W^{-1}])\simeq \Sp_{\ge 0}\to \Sp\ .\end{eqnarray*}

of the following functorial constructions:
\begin{enumerate}
\item We first take the underlying symmetric monoidal groupoid.
\item Then we apply the nerve in order to get a commutative monoid in the category of spaces $\sSet[W^{-1}]$, i.e. an $E_{\infty}$-space.
\item Then we apply the group completion functor to obtain a commutative group in spaces, i.e. a grouplike $E_{\infty}$-space.
\item Finally we apply the functor which maps a commutative group in spaces to the corresponding connective spectrum whose $\infty$-loop space is this group.
\end{enumerate}}
\end{rem}
\begin{rem}{\rm Note that the symmetric monoidal stacks ${\mathbf{Vect}}^{\nabla}$ and ${\mathbf{Vect}}$ are pulled back from from d stacks on the site $\Mf$ via the forgetful morphism \eqref{ewfwfwefewfewfwfwefwfe}. They same is true for the associated sheaves of $K$-theory spectra $\hat {\mathbf{ku}}^{\nabla}$ and $\hat {\mathbf{ku}}$. They represent differential versions of connective $K$-theory ${\mathbf{ku}}$ and are studied in detail in \cite[Sec. 6]{2013arXiv1311.3188B}
}\end{rem}

\subsection{Characteristic cocycles}\label{kfjwelfewfewfewfewfewfewfe}

In order to construct the regulator we use the method introduced in \cite{Bunke:2012fk} based on the notion of characteristic cocycles.
We consider the category of chain complexes $\Ch$.
We have $$DD^{-}, DD^{per}\in \Sh_{ \Ch }(\Mf_{\C-fol})$$
introduced in Definition \ref{jkdjlqwdqwdqwd}, where here we forget the algebra structure.
Using the Chern character forms (Definitions \ref{ffwefwefewfewfwfw} and \ref{qldjqwldqwdqwdqwd}) and their naturality (equations \eqref{wqdqwdqwdwqdwqwqdqd} and \eqref{wqdqwdqwdwqdwqwqdqd1}) we get characteristic cocycles (see \cite[Def. 2.12]{Bunke:2012fk})
$$\ch^{-}:\pi_{0}({\mathbf{Vect}}^{\flat,\nabla})\to Z^{0}(DD^{-}) \ , \quad
\ch:\pi_{0}({\mathbf{Vect}}^{ \nabla})\to Z^{0}(DD^{per}) \ .$$
Here $\pi_{0}$ sends a symmetric monoidal category to its commutative monoid of isomorphism classes.
We will consider commutative monoids as symmetric monoidal categories, see Remark \ref{dkwqnmqlkwdwqdwqdqd}.
The following diagram in ${\mathbf{PSh}}_{{\mathbf{CAlg}}({\mathbf{Cat}}[W^{-1}])}(\Mf_{\C-fol})$ commutes:
\begin{equation}\label{kjwehbfjkfhekfehwfkewfef09890}
\xymatrix{\pi_{0}({\mathbf{Vect}}^{\flat,\nabla})\ar[r]\ar[d]& Z^{0}(DD^{-})\ar[d]\\\pi_{0}({\mathbf{Vect}}^{ \nabla})\ar[r]&Z^{0}(DD^{per})}\ . \end{equation}

Let $$\mathbf{H}:\Ch[W^{-1}]\to \Sp$$ denote the Eilenberg-MacLane functor (see \cite[(22)]{Bunke:2012fk}). We will use the notation
\begin{equation}\label{ewfewfewfewf444efewfewfewfwef}
\sigma^{\ge p}\mathbf{DD}^{-}:=\mathbf{H}(\sigma^{\ge p }DD^{-})\ , \quad \sigma^{\ge p}\mathbf{DD}^{per}:=\mathbf{H}(\sigma^{\ge p}DD^{per})
\end{equation} for $p\in \Z$.
Note that
$$\sigma^{\ge p}\mathbf{DD}^{-}\ , \sigma^{\ge p}\mathbf{DD}^{per}\in \Sh_{ \Sp}(\Mf_{\C-fol})$$
(see \cite[Sec. 2.3]{Bunke:2014aa}).
Applying the general construction of regulators Definition \cite[Def. 2.14]{Bunke:2012fk}
we get the commuting diagram in ${\mathbf{PSh}}_{\Sp}(\Mf_{\C-fol})$:

\begin{equation}\label{kjwehbfjkfhekfehwfkewfef098901}
\xymatrix{\mathcal{K}({\mathbf{Vect}}^{\flat,\nabla}) \ar[r]^{r(\ch^{-})}\ar[d]& \sigma^{\ge 0}\mathbf{DD}^{-}\ar[d]\\\ \mathcal{K}({\mathbf{Vect}}^{\nabla})\ar[d]^{u}\ar[r] &\sigma^{\ge 0}\mathbf{DD}^{per}\\\hat {\mathbf{ku}}^{\nabla}\ar@{.>}[ur]_{r(\ch)}& }\ .
\end{equation}
In order to get the lower triangle we use that $\sigma^{\ge 0}\mathbf{DD}^{per}$ is a sheaf and the universal property of the unit $u$ of the sheafification.

\

\subsection{The class $[V,\nabla^{I}]^{alg}$}

Let us fix a manifold $X$. We want to consider foliations whose space of leaves is $X$. Trivial foliations of this type are obtained by taking the product of the typical leaf with $X$.
In this way we actually obtain an inclusion of manifolds into foliations. More precisely we consider the functor \begin{equation}\label{dwedewdewdwd}
j_{X}:\Mf \to \Mf_{\C-fol}\ , \quad j_{X}(P):=(P,\cF_{max})\times (X,\cF_{min})\ .
\end{equation} A manifold manifold $Y$ also gives rise to endofunctors
\begin{equation}\label{a1}
i_{Y}:\Mf \to \Mf \ , \quad i_{Y} (P):= Y\times P\ ,
\end{equation} and \begin{equation}\label{a2}
i_{Y}:\Mf_{ \C-fol} \to \Mf_{\C-fol}\ ,\quad i_{Y} (P,\cF):=(Y,\cF_{max})\times (P,\cF)\ .
\end{equation}
\bigskip

The projection $Y\to *$ induces a morphism $\id\to i_{Y}^{*}$.

\bigskip

Let $I:=[0,1]$ denote the unit interval. Let $\bC$ be a presentable $\infty$-category. \begin{ddd}
An object $A\in \Sh_{\bC}(\Mf)$ (or $ {\mathbf{PSh}}_{\bC}(\Mf)$, $ \Sh_{\bC}(\Mf_{\C-fol})$ or $ {\mathbf{PSh}}_{\bC}(\Mf_{\C-fol})$) is called homotopy invariant if the natural morphism
$A\to i_{I}^{*} A$ is an equivalence. \end{ddd}
We indicate the full subcategories of homotopy invariant (pre)sheaves by an upper index $h$.

\bigskip

\begin{ex}{\rm By \cite[Prop. 2.6, 1.]{2013arXiv1311.3188B} (see also Lemma \ref{kfhkwjefewfewf98790234jnbkjf} below), for a homotopy invariant sheaf $\bE\in \Sh^{h}_{\bC}(\Mf)$ we have a natural equivalence
$$\underline{\bE(*)}\simeq \bE\ .$$ If $\bC=\Sp$, then for
$M\in \Mf$ and $k\in \Z$ we have a natural isomorphism of abelian groups
\begin{equation}\label{fwefewfewfewfe324}
\pi_{k}(\bE(M))\cong \bE(*)^{-k}(M)\ .
\end{equation}
Observe that a similar statement is not true for homotopy invariant sheaves on $\Mf_{\C-fol}$.}
\end{ex}

\begin{lem}\label{leelfjwelfwefewf}
The sheaf ${\mathbf{Vect}}^{\flat}$ is homotopy invariant.
\end{lem}
\proof The reason is that the foliation of $i_{I}(M,\cF)=(I\times M,T_{\C}I\boxplus \cF)$ contains the $I$-direction. For $(V,\nabla^{I})\in {\mathbf{Vect}}^{\flat}(i_{X}(M,\cF))$
we can use the flat connection $\nabla^{I}$ in order to define a parallel transport in the $I$-direction.

Hence a vector bundle $(V,\nabla^{I})$ with a flat partial connection or a morphism between two such objects over $I\times M$ is uniquely determined by the restriction to $\{0\}\times M$. \hB

We now use the fact that on the site $\Mf$ sheafification preserves homotopy invariance.
\begin{lem}\label{kfhkwjefewfewf98790234jnbkjf}
If $\mathbf{F}\in {\mathbf{PSh}}^{h}_{\bC}(M)$, then $L(\mathbf{F})\simeq \underline{\mathbf{F}(*)} $. In particular $L(\mathbf{F})\in \Sh^{h}_{\bC}(\Mf)$.
\end{lem}
\proof If ${\mathcal{U}}$ is a good covering of a manifold $M$, then let $U^{\bullet} $ denote the associated simplicial manifold called the \v{C}ech nerve. We get a simplicial set $\pi_{0}(U^{\bullet})\in \sSet$. Applying the localization $\iota:\sSet\to \sSet[W^{-1}]$ we obtain a space
$ \iota \pi_{0}(U^{\bullet})\in \sSet[W^{-1}]$.

Using the inclusion $d:\Set\to \sSet$ of sets as discrete simplicial sets we can consider $d(\pi_{0}(U^{\bullet}))$ as a simplicial object in $\sSet$ which is levelwise discrete. If we apply $\iota$ levelwise, then we get a simplicial space
$\iota(d(\pi_{0}(U^{\bullet})))\in \Fun(\Delta^{op},\sSet[W^{-1}])$. Note that for every simplicial set $X$ we have a natural equivalence
\begin{equation}\label{wefewfwfewfewfwf2342343efdwef3232rd3455678}
\colim_{\Delta^{op}} \iota(d(X))\simeq \iota (X)\ .
\end{equation}
Since ${\mathcal{U}}$ was a good covering we have an equivalence
$$M_{top}\simeq \iota \pi_{0}(U^{\bullet})\stackrel{\eqref{wefewfwfewfewfwf2342343efdwef3232rd3455678}}{\simeq} \colim_{\Delta^{op}} \iota(d(\pi_{0}(U^{\bullet})))\ ,$$
where $M_{top}$ denotes the underlying space of $M$. We use this equivalence at the place marked by $!!$, and the homotopy invariance of $\mathbf{F}$ at $!$ in the following chain of equivalences: $$\lim_{\Delta} \mathbf{F}(U^{\bullet}) \stackrel{!}{\simeq} \lim_{\Delta} \mathbf{F}(*)^{\iota (d( \pi_{0}(U^{\bullet})))} \simeq \mathbf{F}(*)^{ \colim_{\Delta^{op}}\iota (d(\pi_{0}(U^{\bullet})))} \stackrel{!!}{\simeq} \mathbf{F}(*)^{M_{top}}\simeq \underline{\mathbf{F}(*)}(M)
\ .$$
Let ${\mathcal{L}}$ be the \v{C}echification operator (see \cite[Definition A.4]{Bunke:2012fk}).
Since good coverings are cofinal in the system of coverings involved in the definition of ${\mathcal{L}}$
we conclude that
$${\mathcal{L}}(\mathbf{F})(M)\simeq \underline{\mathbf{F}(*)}(M)\ .$$ Hence ${\mathcal{L}}(\mathbf{F})\simeq\underline{\mathbf{F}(*)} $ is a homotopy invariant sheaf. Using the equivalence \cite[(176)]{Bunke:2012fk}) we get that
$$L(\mathbf{F})\simeq L({\mathcal{L}}(\mathbf{F}))\simeq {\mathcal{L}}(\mathbf{F})\simeq \underline{\mathbf{F}(*)}\ .$$
\hB

We define $${\mathbf{K}}_{X}:=L(j_{X}^{*} \mathcal{K}({\mathbf{Vect}}^{\flat}))\in \Sh^{h}_{\Sp}(\Mf)\ .$$
Note that $j_{X}^{*}$ preserves homotopy invariance and the sheaf condition. By Lemmas \ref{leelfjwelfwefewf}
and \ref{kfhkwjefewfewf98790234jnbkjf} we see that ${\mathbf{K}}_{X}$ is indeed a homotopy invariant sheaf.

\bigskip

We have a chain of equivalences of symmetric monoidal categories $$j_{X}^{*}{\mathbf{Vect}}^{\flat}(*)\simeq {\mathbf{Vect}}^{\flat}(X,\cF_{min})\simeq {\mathbf{Vect}}(X)\simeq \mathbf{Proj}(C^{\infty}(X))\ ,$$
where the first three are obtained by specializing definitions, and the last is Swan's theorem.
This implies
\begin{equation}\label{fefwefwefewf234234sdfsdf}
{\mathbf{K}}_{X}(*)= \mathcal{K}(\mathbf{Proj}(C^{\infty}(X)))\stackrel{def}{=}{\mathbf{K}}(C^{\infty}(X))\ ,
\end{equation}
where the last equality is our definition of the connective algebraic $K$-theory spectrum of the ring $C^{\infty}(X)$.

We can now give the technical definition of the class $[V,\nabla^{I}]^{alg}\in {\mathbf{K}}(C^{\infty}(X))^{0}(P)$ for a pair $$(V,\nabla^{I})\in {\mathbf{Vect}}^{\flat}(P\times X,T_{\C}P\oplus 0)\ .$$ Indeed, we have
$(V,\nabla^{I})\in j_{X}^{*}{\mathbf{Vect}}^{\flat}(P)$. This object naturally represents a point in $\Omega^{\infty} {\mathbf{K}}_{X}(P)$.
\begin{ddd}\label{xgrergegg} We define \begin{equation}\label{fwefwefwefffefefewfewfewff324234}
[V,\nabla^{I}]^{alg}\in \pi_{0}({\mathbf{K}}_{X}(P))\stackrel{\eqref{fwefewfewfewfe324}}{\cong} {\mathbf{K}}_{X}(*)^{0}(P) \stackrel{\eqref{fefwefwefewf234234sdfsdf}}{\cong} {\mathbf{K}}(C^{\infty}(X))^{0}(P)\ .
\end{equation}
to be the connected component represented by the point $[V,\nabla^{I}]$.\end{ddd}

\subsection{Differential $K$-theory and the regulator map}

We assume that $\bC$ is a stable presentable $\infty$-category like spectra $\Sp$ or chain complexes $\Ch[W^{-1}]$.
We have have an adjunction
$$\cH:\Sh_{\bC}(\Mf )\leftrightarrows \Sh^{h}_{\bC}(\Mf ):inclusion\ ,$$
where $\cH$ is called homotopification. By \cite[Prop. 7.6.(2)]{2013arXiv1311.3188B} it is given by a composition
$\cH\simeq L\circ \cH^{pre}$, where $\cH^{pre}:\Sh_{\bC}(\Mf )\to {\mathbf{PSh}}^{h}_{\bC}(\Mf)$ is given by
\begin{equation}\label{fewfwfewfewfewfewfewfewfewfewf}
\cH^{pre} \simeq \colim_{\Delta^{op}} i_{\Delta^{\bullet}}^{*}
\end{equation}
using the notation \eqref{a1}.
Similarly, for the site $\Mf_{\C-fol}$ we have an adjunction \begin{equation}\label{ggg889899898893443}
\cH^{\flat}:\Sh_{\bC}(\Mf_{\C-fol} )\leftrightarrows \Sh^{h}_{\bC}(\Mf_{\C-fol} ):inclusion\ ,
\end{equation} where $\cH^{\flat}=L\circ \cH^{\flat,pre}$ with $\cH^{\flat, pre}$ given again by \eqref{fewfwfewfewfewfewfewfewfewfewf}, but now using \eqref{a2}.
For a manifold $X$ the functor $j_{X}^{*}$ (see \eqref{dwedewdewdwd}) preserves homotopy invariant sheaves. Moreover, if $X$ is compact, then we have \begin{equation}\label{refwefwefewfew45355321343241325465}
j_{X}^{*}\circ \cH^{\flat}\simeq \cH\circ j_{X}^{*}
\end{equation}
(compare \cite[Lemma 2.4 (4)]{Bunke:2014aa} for a proof of a similar statement).

\begin{lem} \label{iewfwefewfewfewf}The sheaves $\mathbf{DD}^{per}$ and $\mathbf{DD}^{-}$ are homotopy invariant. Moreover, for every $p\in \Z$ the inclusions
$$\sigma^{\ge p} \mathbf{DD}^{-}\to \mathbf{DD}^{-}\ , \quad \sigma^{\ge p} \mathbf{DD}^{per}\to \mathbf{DD}^{per}$$ are equivalent to the units of the homotopification.
\end{lem} \proof We start with the case of the map $\sigma^{\ge p} \mathbf{DD}^{per}\to \mathbf{DD}^{per}$ between sheaves on $\Mf$.
Recall the definition \eqref{ewfewfewfewf444efewfewfewfwef}. We let $$\iota:\Ch\to \Ch[W^{-1}]$$ be the canonical localization map.
We have $$\sigma^{\ge p}DD^{per}\cong \prod_{q\in \Z} (\sigma^{\ge p+2q}\Omega)[2q]\ .$$
We discuss the factors separately.
By \cite[Lemma 7.15]{2013arXiv1311.3188B} the map
$$\iota (\sigma^{\ge p+2q}\Omega)[2q] \to \iota( \Omega)[2q]$$ is the unit of the homotopification.
This implies the assertion for $ \mathbf{DD}^{per}$ after applying the Eilenberg-MacLane functor $\mathbf{H}$.

\bigskip

We now discuss $DD^{-}$. We first observe that
$\iota (DD^{-})$ is a homotopy invariant sheaf on the site $\Mf_{\C-fol}$ with values in $\Ch[W^{-1}]$.
We again consider one factor of $$ DD^{-}\cong \prod_{q\in \Z} F^{q}\Omega [2q]$$ at a time. For a foliated manifold $(M,\cF)$ the integration $\int_{I\times M/M}$ preserves the filtration and induces a map
$$\int_{I\times M/M} F^{q}\Omega(I\times M,T_{\C}I\boxplus \cF) \to F^{q}\Omega(M, \cF)[ -1]$$ such that
$$d\int_{I\times M/M} x=x_{|\{1\}\times M}-x_{|\{0\}\times M}\ .$$
This implies that $\iota (F^{q}\Omega )$ is homotopy invariant.

\begin{rem}{\rm The point here is that we define homotopy invariance along the leaf direction.
If we would include transverse directions, then the integral would not preserve the filtration. In this case we only have
$$\int_{I\times M/M} F^{p}\Omega(I\times M, \{0\}\boxplus\cF) \to F^{p-1}\Omega(M, \cF)[ -1]\ .$$
In this case the integration would not be defined on $DD^{-}$.
}
\end{rem}

Once we know that $\iota (F^{q}\Omega) \in \Sh_{\Ch[W^{-1}]}(\Mf_{\C-fol})$ is homotopy invariant, we show that $$\iota( \sigma^{\ge p} F^{q}\Omega) \to \iota (F^{q}\Omega) $$
is the unit of the homotopification exactly as in \cite[Lemma 7.15]{2013arXiv1311.3188B}.
Note that by (the analog of) \cite[Lemma 7.13]{2013arXiv1311.3188B} $\cH^{\flat}(\iota (F^{q}\Omega)^{\ell})=0$ for every $\ell\in \Z$.
This implies as in the proof of \cite[Lemma 7.15]{2013arXiv1311.3188B} that
$\cH^{\flat}(\iota (\sigma^{<p} F^{q}\Omega) )=0$. The claim now follows from an application of $\cH^{\flat}\circ \iota$ to the exact sequence of $\Ch$-valued sheaves
$$0\to\sigma^{<p} F^{q}\Omega \to F^{q}\Omega \to \sigma^{\ge p} F^{q}\Omega \to 0\ . $$

\hB

We define $${\mathbf{K}}^{\nabla}_{X}:=L(j_{X}^{*} \mathcal{K}({\mathbf{Vect}}^{\flat,\nabla}))\ .$$

\begin{lem}\label{kljldqwdqwdwqdd}
The morphisms $$ {\mathbf{K}}_{X}^{\nabla}\to {\mathbf{K}}_{X}\ , \quad \hat {\mathbf{ku}} \to \underline{{\mathbf{ku}}}\ , \quad \hat {\mathbf{ku}}^{\nabla}\to \underline{{\mathbf{ku}}}$$
are equivalent to the units of the homotopification.
\end{lem}
\proof The second and the third cases are consequences of \cite[Lemma 6.3]{2013arXiv1311.3188B}
and \cite[Lemma 6.5]{2013arXiv1311.3188B}. It remains to discuss the first case.
We know that ${\mathbf{K}}_{X}$ is homotopy invariant. Then the assertion now follows from the analog of
\cite[Lemma 6.4]{2013arXiv1311.3188B} for ${\mathbf{Vect}}^{\flat,\nabla}\to{\mathbf{Vect}}^{\flat}$. \hB

From \eqref{kjwehbfjkfhekfehwfkewfef098901} and the fact that the two objects on the right and the lower left corner are sheaves we get the diagram
\begin{equation}\label{kjwehbfjkfhekfehwfkewfeeedef098901}
\xymatrix{{\mathbf{K}}^{\nabla}_{X} \ar[r]^{j_{X}^{*}r(\ch^{-})}\ar[d]& j_{X}^{*}\sigma^{\ge 0}\mathbf{DD}^{-}\ar[d]\\ j_{X}^{*}\hat {\mathbf{ku}}^{ \nabla}\ar[r]^{j_{X}^{*}r(\ch)}&j_{X}^{*}\sigma^{\ge 0}\mathbf{DD}^{per}}\ .
\end{equation}
We now assume that $X$ is compact. Then by \eqref{refwefwefewfew45355321343241325465}
homotopification commutes with $j_{X}^{*}$.
Applying homotopification to this square and using Lemmas \ref{iewfwefewfewfewf}, \ref{kljldqwdqwdwqdd} we get the square \begin{equation}\label{r223r23r32r32r324}
\xymatrix{{\mathbf{K}}_{X} \ar[r]^{\omega^{-}_{X}}\ar[d]& j_{X}^{*} \mathbf{DD}^{-}\ar[d]\\ j_{X}^{*}\underline{{\mathbf{ku}}} \ar[r]^{\omega_{X}}& j_{X}^{*} \mathbf{DD}^{per}}\ . \end{equation}

We consider the following three versions of Hopkins-Singer type (see \cite{MR2192936} for the original definition and \cite{2013arXiv1311.3188B} for more information) differential algebraic and differential $K$-theories for $p\in \Z$
$$\xymatrix{\hat {\mathbf{K}}_{X }^{p} \ar[d]^{I}\ar[r]&j_{X}^{*}\sigma^{\ge p}\mathbf{DD}^{-}\ar[d]\\ {\mathbf{K}}_{X } \ar[r]^{\omega^{-}_{X}}&j^{*}_{X}\mathbf{DD}^{-}}\ , \quad \xymatrix{\hat {\mathbf{ku}}_{X}^{\flat,p}\ar[d]\ar[r]^{R}&j_{X}^{*}\sigma^{\ge p}\mathbf{DD}^{-}\ar[d]\\ j_{X}^{*}\underline{{\mathbf{ku}}}\ar[r]^{\omega_{X}}&j_{X}^{*}\mathbf{DD}^{per}}\ ,
\quad
\xymatrix{\hat {\mathbf{ku}}^{p}\ar[d]\ar[r]&\sigma^{\ge p}\mathbf{DD}^{per}\ar[d]\\\underline{{\mathbf{ku}}}\ar[r]&\mathbf{DD}^{per}}$$
defined by the respective pull-back square in $\Sh_{\Sp}(\Mf)$. We define the corresponding differential cohomology groups by
$$\hat K_{X}^{p}(P):=\pi_{-p}(\hat {\mathbf{K}}_{X }^{p} (P))\ , \quad \widehat{ku}_{X}^{\flat,p}(P):=\pi_{-p}(\hat {\mathbf{ku}}_{X }^{\flat, p} (P))\ , \quad
\widehat{ku}^{p}(P):=\pi_{-p}(\hat {\mathbf{ku}}^{p}(P))\ .$$

The square \eqref{r223r23r32r32r324} together with the obvious commutative square \begin{equation}\label{hjkdekdhewkdjhewkdewd}
\xymatrix{\sigma^{\ge p}\mathbf{DD}^{-} \ar[r]\ar[d]& \mathbf{DD}^{-}\ar[d]\\ \sigma^{\ge p}\mathbf{DD}^{per} \ar[r]& \mathbf{DD}^{per}}\ .\end{equation}
induces a chain of morphisms
$$\hat {\mathbf{K}}_{X}^{p}\to \hat {\mathbf{ku}}_{X}^{\flat,p}\to j^{*}_{X}\hat {\mathbf{ku}}^{p}\ .$$
Using \eqref{kjwehbfjkfhekfehwfkewfeeedef098901} we finally get the square
$$\xymatrix{ {\mathbf{K}}_{X}^{\nabla} \ar[r]^{cycl}\ar[d]& \hat {\mathbf{K}}_{X}^{0}\ar[d]\\ j_{X}^{*}{\mathbf{ku}}^{\nabla} \ar[r]^{cycl}&j_{X}^{*}\hat {\mathbf{ku}}^{0}}$$ where the horizontal maps are the differential cycle maps.

\bigskip

The following exact sequences are part of the general features of a Hopkins-Singer differential cohomology. The sequence
\begin{equation}\label{} \dots\to DD^{-}(P\times X,T_{\C}P\boxplus\{0\})^{\ell-1} /\im(d) \stackrel{a}{\to} \hat K_{X}^{\ell}(P)\stackrel{I}{\to} {\mathbf{K}}_{X}(*)^{\ell}(P)\to 0
\end{equation}
describes the set of possible differential lifts of topological classes. The second sequence
\begin{equation}\label{} 0\to \widehat{ku}^{\flat,\ell}_{X,flat}(P)\to \widehat{ku}^{\flat,\ell}_{X}(P)\stackrel{R}{\to} Z^{\ell}(DD^{-}(X\times P,T_{\C}P\boxplus\{0\}))\to\dots
\end{equation}
reflects the definition of the flat subgroup.

\bigskip

We consider the case $P=*$, $\ell:=-p$ and assume that $\dim(X)<p$. In this case it is straightforward to check that $DD^{-}(X,\cF_{min})^{-p-1}=0$ and $DD^{-}(X,\cF_{min})^{-p}=0$.

This implies the isomorphisms $$I:\hat K_{X}^{-p}(*)\stackrel{\cong}{\to} {\mathbf{K}}_{X}(*)^{-p}\ , \quad \widehat{ku}_{X,flat}^{\flat,-p}(*)\stackrel{\cong}{\to} \widehat{ku}_{X}^{\flat,-p}(*)\ .$$
\begin{ddd}\label{klfwefewfewfwf} For $p\in \nat$ such that $\dim(X)<p$ we define the regulator map
${\tt reg}_{X}$ as the composition
$$\hspace{-1cm}K_{p}(C^{\infty}(X))\cong {\mathbf{K}}_{X}(*)^{-p}\stackrel{\cong}{\leftarrow} \hat K_{X}^{-p}(*) \to \widehat{ku}_{X}^{\flat,-p} (*)\stackrel{\cong}{\rightarrow} \widehat{ku}^{\flat,-p}_{X,flat} (*)
\to \widehat{ku}^{-p}_{flat}(X,\cF_{min})\cong {\mathbf{ku}}\C/\Z^{-p-1}(X)$$
\end{ddd}

In Remark \ref{2ddhi3dhio32doi2doo2oidjud2} we will explain how this regulator can be obtained by specializing a more basic regulator.

\begin{rem}{\rm In \cite[Thm 1.1]{Bunke:2014aa} we defined a similar regulator map
$$\sigma_{p}:K_{p}(C^{\infty}(X))\to {\mathbf{ku}}\C/\Z^{-p-1}(X)\ .$$
using different methods. While here, in order to define the Chern character, we use characteristic forms associated to connections, in \cite{Bunke:2014aa} we use the Goodwillie-Jones Chern character.
The two Chern characters equivalent as primary invariants \cite[Lemma 2.27]{Bunke:2014aa}. In order to compare the two regulator maps $\sigma_{d}$ and ${\tt reg}_{X}$ we would need to compare the two Chern characters on the space level. So at the moment it remains an open question whether $\sigma_{d}={\tt reg}_{X}$.
}\end{rem}

\subsection{Integration and proof of Theorem \ref{flkfefwefwefewfef}}\label{fwklfwfewfewfewf}

We now assume that $P$ is closed and has a stable framing $s$. Then $f:P\to *$ has a natural differential orientation $\hat o_{s}$ (see \cite[Example 4.230]{2012arXiv1208.3961B}) and we have an associated Umkehr map in every Hopkins-Singer differential cohomology theory.
We further assume that $X$ is closed, spin and equipped with a Riemannian metric. This induces a differential ${\mathbf{ku}}$-orientation $\hat o$ of the projection $\pi:X\to *$, see Subsection \ref{wlekfjwelfjewlfwef123}.

Let $p:=\dim(P)$ and $d:=\dim(X)$. We have the commutative diagram
\begin{equation}\label{dewewdedewdewd2342343}
\xymatrix{ &&\widehat{KU}^{0}(P\times X)\ar[r]^{(\pi\circ f)^{\hat o\circ \hat o_{s}}_{!}}&\widehat{KU}^{-p-d}(*)\ar[r]^{\cong}&\C/\Z\ar@{=}[d]\\
\pi_{0}({\mathbf{K}}_{X}^{\nabla})\ar@/^3cm/[rrrru]^{(V,\nabla^{V})\mapsto \rho(M,\cF,\nabla^{I},s)} \ar[urr]^{(V,\nabla)\mapsto [V,\nabla]}\ar[d]_{(V,\nabla^{V})\mapsto [V,\nabla^{I}]^{alg}}\ar[r]^{cycl} &\hat K_{X}^{0}(P)\ar[r]\ar[dl]\ar[d]^{f_{!}^{\hat o_{s}}}\ar@{}[dr]^{\textcircled{2}}&\widehat{ku}^{0}(P\times X) \ar@{}[ur]^{\textcircled{2}}\ar[d]^{\hat f^{\hat o_{s}\times X}_{!}}\ar@{}[dr]^{\textcircled{1}}\ar[u]\ar[r]^{(\pi\circ f)^{\hat o\circ \hat o_{s}}_{!}}&\widehat{ku}^{-p-d}(*) \ar[u]\ar@{=}[d]\ar[r]^{\cong}&\C/\Z\ar@{=}[d]\\
{\mathbf{K}}_{X}(*)^{0}(P)\ar@{}[r]^{\textcircled{3}} \ar[d]^{f_{!}^{o_{s}}}&\ar[dl]^{\cong}\hat K^{-p}_{X}( *) \ar@{}[dr]^{\textcircled{4}}\ar[r] &\widehat{ku}^{-p }(X) \ar[r]^(0.5){\pi_{!}^{\hat o}} &\widehat{ku}^{-d-p}(*) \ar[r]^{\cong}&\C/\Z
\\ {\mathbf{K}}_{X}(*)^{-p} \ar[rr]^{{\tt reg}_{X}}
& &{\mathbf{ku}}\C/\Z^{-p-1}(X)\ar[r]^{\pi_{!}^{o}} \ar[u]\ar@{}[ur]^{\textcircled{5}}&{\mathbf{ku}}\C/\Z^{-d-p-1}(*)\ar[u]_{\cong } & }
\end{equation}
The square $\textcircled{1}$ commutes by the ${\mathbf{ku}}$-analog of \eqref{e23e23e3e23e32e32e2}.
For the squares $\textcircled{2}$ we use that integration commutes with transformations between Hopkins-Singer differential cohomology theories provided the orientations are related correspondingly.
For the square $\textcircled{3}$ we use the right-most square of the ${\mathbf{ku}}$-analog of \eqref{r23r23r23r23r23r235435346546}.
The square $\textcircled{4}$ commutes by the definition of the regulator. For $\textcircled{5}$
we use that the identification of the flat subgroup in a Hopkins-Singer differential cohomology with with the $\C/\Z$-version of the underlying cohomology theory is compatible with integration, i.e. the left-most square in the ${\mathbf{ku}}$-analog of \eqref{r23r23r23r23r23r235435346546}.

\bigskip

The upper composition in \eqref{dewewdedewdewd2342343} maps $(V,\nabla^{V})$, essentially by definition, to $\rho(M,\cF,\nabla^{I},s)$ as indicated. The down-right composition sends $(V,\nabla^{V})$ to
$$\pi_{!}^{o}({\tt reg}_{X}(f_{!}^{o_{s}}([V,\nabla^{I}]^{alg})))\ .$$
Thus Theorem \ref{flkfefwefwefewfef} follows from the commutativity of \eqref{dewewdedewdewd2342343}. \hB

\section{Algebraic $K$-theory of foliations}\label{dkqwldqwdwqdwqdwqdwqd}

In this section we define the algebraic $K$-theory sheaf ${\mathbf{K}}$ on $\Mf_{\C-fol}$. Its homotopy groups $$K^{*}(M,\cF):=\pi_{-*}({\mathbf{K}}(M,\cF)) $$ can be considered as the algebraic $K$-theory groups of the foliation $(M,\cF)$. We further introduce the Hodge-filtered connective $K$-theory sheaf $ {\mathbf{ku}}^{\flat}$ and define a regulator
$${\tt reg}:{\mathbf{K}}\to {\mathbf{ku}}^{\flat}\ .$$ For $p>\mathrm{codim}(\cF)$ it induces a map
$$\tilde {\tt reg}:K^{-p}(M,\cF)\to {\mathbf{ku}}\C/\Z^{-p-1}(M)$$
which generalizes the regulator introduced in Definition \ref{klfwefewfewfwf}.

\bigskip

\begin{rem}{\rm This section has a considerable overlap with the work of Karoubi \cite{karoubi43}, \cite{karoubi45}.
We add this section to the present paper since it fits well with the set-up developed here and puts the regulator in its natural framework.
We will study this regulator and examples elsewhere.
}
\end{rem}

We will use the notation introduced in Subsection \ref{klfwjlefewfewf}. In particular ${\mathbf{Vect}}^{\flat}$ and ${\mathbf{Vect}}^{\flat,\nabla}$ denote the symmetric monoidal stacks of pairs $(V,\nabla^{I})$ and $(V,\nabla)$ of complex vector bundles and flat partial connections, or complex vector bundles and connections whose restriction to the foliation is flat, respectively.
The symbols $L$ and $\cH^{\flat}$ denote the sheafification and the homotopification operations.

\begin{ddd} We define sheaves of spectra
$$ {\mathbf{K}} := \cH^{\flat}(L(\mathcal{K}({\mathbf{Vect}}^{\flat}))) \in \Sh^{h}_{\Sp}(\Mf_{\C-fol})\ , \quad {\mathbf{K}}^{\nabla} := L(\mathcal{K}({\mathbf{Vect}}^{\flat,\nabla})) \in \Sh_{\Sp}(\Mf_{\C-fol})\ .$$ For $p\in \Z$ we define the algebraic $K$-theory of a foliated manifold $(M,\cF)$ by
$$ K^{p}(M,\cF):=\pi_{-p}( {\mathbf{K}} (M,\cF))\ .$$
\end{ddd}
\begin{rem}\rm {Note that $\mathcal{K}({\mathbf{Vect}}^{\flat})$ is homotopy invariant.
We expect that the sheafification preserves homotopy invariance so that the homotopification is not really necessary in this definition.
}\end{rem}

In order to motivate this definition let us discuss special cases.

\begin{ex}{\rm Recall the functor $j:=j_{*}:\Mf\to \Mf_{\C-fol}$ given by $$j(M):=(M,\cF_{max})\ ,$$ see \eqref{dwedewdewdwd}. Let ${\mathbf{K}}(\C)$ denote the connective algebraic $K$-theory spectrum of the field $\C$.
\begin{lem} \label{fwekflewfwfewfw} We have an equivalence $j^{*}{\mathbf{K}}\simeq \underline{{\mathbf{K}}(\C)}$ \end{lem} \proof Since
$j (I\times M)\cong (I, T_{\C}I)\times j(M)$ we conclude that $j^{*}$ preserves homotopy invariant sheaves. Since ${\mathbf{K}}$ is homotopy invariant the sheaf
$j^{*}{\mathbf{K}}$ is homotopy invariant. Therefore (see \cite[Prop. 2.6, 1.]{2013arXiv1311.3188B}) we have an equivalence
$$j^{*}{\mathbf{K}}\simeq \underline{(j^{*}{\mathbf{K}})(*)}\ .$$
If $\bE$ is a presheaf of spectra on $\Mf_{\C-fol}$ and $L$ is the sheafification \eqref{dqdqwdwqdwqdwqdwqdqd}, then have a natural equivalence of spectra $L(\bE)(*)\simeq \bE(*)$.
Consequently,
$$(j^{*}{\mathbf{K}})(*)\simeq \mathcal{K}({\mathbf{Vect}}^{\flat}(*,\cF_{max}))\ .$$ The category ${\mathbf{Vect}}^{\flat}(*,\cF_{max}) $ is the category of finite-dimensional complex vector spaces. Consequently we have an equivalence of spectra $$\mathcal{K}({\mathbf{Vect}}^{\flat}(*,\cF_{max}))\simeq {\mathbf{K}}(\C)\ .$$
The combination of these equivalences gives the assertion of the Lemma.
\hB

\bigskip

As a consequence of Lemma \ref{fwekflewfwfewfw} we have for a manifold $M$ \begin{equation}\label{cejkwdjejdhewdhewkdei832e32e32e32e2e}
K^{*}(M,\cF_{max})\cong {\mathbf{K}}(\C)^{*}(M)\ .
\end{equation}

}
\end{ex}

\begin{ex}\label{kjffjewfewjkfhewkjfewfkewfhewf87z}{\rm We have a natural functor
$\kappa:\Mf\to \Mf_{\C-fol}$ which is given by $M\mapsto (M,\cF_{min})$.
On the site $\Mf$ we have the differential cohomology theory $\hat {\mathbf{ku}}$, see \eqref{wqdwqddq21321} and \cite{2013arXiv1311.3188B}.
We have an equivalence of sheaves of spectra on $\Mf_{\C-fol}$ $$\kappa^{*}{\mathbf{K}}\simeq \hat {\mathbf{ku}}\ .$$
Consequently, $$K^{*}(M,\cF_{min})\cong \widehat{ku}^{*}(M)\ .$$

}
\end{ex}

\begin{ex}{\rm For a fixed manifold $P$ there is a natural map \begin{equation}\label{dkejwdlewdewded}
{\mathbf{K}}_{X}(P)\to {\mathbf{K}}(P\times X,T_{\C}P\boxplus\{0\})\end{equation}
which is natural in $X$. It is essentially the sheafification morphism in the direction of $X$.
The spectrum valued functor $$P\mapsto {\mathbf{K}}(P\times X, T_{\C}P\boxplus 0)$$ is a homotopy invariant sheaf on $\Mf$.
Since
$${\mathbf{K}}(\{*\}\times X, T_{\C}\{0\}\boxplus 0)\simeq \hat {\mathbf{ku}}(X)$$ by \cite[Prop.2.6, 1.]{2013arXiv1311.3188B}) it is therefore equivalent to $\underline{\hat {\mathbf{ku}}(X)}$. We thus get a map
$$ K_{X}^{*}(P)\to K^{*}(P\times X, T_{\C}P\boxplus 0)\cong \hat{{\mathbf{ku}}}(X)^{*}(P)\ .$$
}
\end{ex}

\begin{ex}\label{kdjqwkldjwqldjwqldwqd}{\rm Assume that $X$ is a smooth complex algebraic variety and let $X^{an}$ be its associated complex manifold with foliation $\cF:=T^{0,1}X^{an}$. Then we can consider the algebraic $K$-theory of ${\mathbf{K}}^{alg}(X)$. It is defined like ${\mathbf{K}}(M,\cF)$ as the sheafification of the presheaf $X\supset U\mapsto \mathcal{K}({\mathbf{Vect}}^{alg}(U))$, where ${\mathbf{Vect}}^{alg}(U)$ is the symmetric monoidal category of algebraic vector bundles on the Zariski open subset $U$. Since the analytic topology of $M$ refines the Zariski topology of $X$ the transformations ${\mathbf{Vect}}^{alg}(U)\to {\mathbf{Vect}}(U^{an})$ induce a map
$${\mathbf{K}}^{alg}(X)\to {\mathbf{K}}(X^{an},T^{0,1}X)\ .$$
This example justifies to call ${\mathbf{K}}(M,\cF)$ the algebraic $K$-theory spectrum of the foliated manifold $(M,\cF)$.
}
\end{ex}

\begin{ex}{\rm If $(V,\nabla^{I})$ is a complex vector bundle with flat partial connection on a foliated manifold $(M,\cF)$, then we get a class
$$[V,\nabla^{I}]^{alg}\in K^{0}(M,\cF)\ .$$
Similarly, if $\nabla$ is a connection which extends $\nabla^{I}$, then we get a class
$$[V,\nabla]^{alg}\in \pi_{0}({\mathbf{K}}^{\nabla}(M,\cF))\ .$$}
\end{ex}

\bigskip

From \eqref{kjwehbfjkfhekfehwfkewfef098901} and the fact that the objects on the right and the lower left corner are sheaves we get the diagram
\begin{equation}\label{kjwehbfjkfhekfehwfkewfeeedef098901rrr}
\xymatrix{{\mathbf{K}}^{\nabla} \ar[r]^{ r(\ch^{-})}\ar[d]& \sigma^{\ge 0}\mathbf{DD}^{-}\ar[d]\\ \hat {\mathbf{ku}}^{ \nabla}\ar[r]^{ r(\ch)}& \sigma^{\ge 0}\mathbf{DD}^{per}}\ .
\end{equation}
Applying homotopification to this square and using the Lemmas \ref{iewfwefewfewfewf} and \ref{kljldqwdqwdwqdd}
we get the square \begin{equation}\label{r223r23r32r32r324rrr}
\xymatrix{{\mathbf{K}} \ar[r]^{\omega^{-}}\ar[d]& \mathbf{DD}^{-}\ar[d]\\ \underline{{\mathbf{ku}}} \ar[r]^{\omega}& \mathbf{DD}^{per}}\ . \end{equation}

\begin{ddd}
We define the Hodge-filtered connective complex ${\mathbf{ku}}$-theory sheaf ${\mathbf{ku}}^{\flat}$ on $\Mf_{\C-fol}$ by the pull-back square \begin{equation}\label{ffewfewfwefewfewf234}
\xymatrix{ {\mathbf{ku}}^{\flat}\ar[d]\ar[r]&\mathbf{DD}^{-}\ar[d]\\
\underline{{\mathbf{ku}}}\ar[r]^{\omega}&\mathbf{DD}^{per}}\ .
\end{equation}
We let $$ ku^{\flat,p}(M,\cF):=\pi_{-p}( {\mathbf{ku}}^{\flat}(M,\cF))$$ be the corresponding Hodge-filtered ${\mathbf{ku}}$-theory groups of $(M,\cF)$.
\end{ddd}

\begin{rem}{\rm In \cite{karoubi43}, \cite{karoubi45} Karoubi introduced, starting from a filtration of the de Rham complex, the multiplicative K-theory $\mathbf{MK}$. Applied to the filtration (Definition \ref{ilfjewlfwfewfewfewfwfw}) coming from a foliation the multiplicative K-theory groups $\mathbf{MK}^{*}(M,\cF)$ are the Hodge-filtered $\mathbf{KU}$-theory groups of $(M,\cF)$. In other words, $ku^{\flat,*}$ is the connective $K$-theory analog of Karoubi's multiplicative $K$-theory. If one applies the functor $\Omega^{\infty}$ to \eqref{ffewfewfwefewfewf234}, the one obtains a pull-back square of sheaves of spaces which is the analog of the square just before the statement of Theorem 7.3 in \cite{karoubi45}. The fact that ${\mathbf{ku}}^{\flat}$ is a sheaf of spectra implies a Mayer-Vietoris type sequence for an open decomposition of a foliated manifold. This is Karoubi's theorem \cite[Thm. 7.7]{karoubi45}.

For a justification to use the term {\em Hodge-filtered...} instead of {\em multiplicative...}
see Remark \ref{jhdjkhkjqwdhqwkjdhwqkjdqwdwqd}.

}
\end{rem}

\begin{rem}\label{jhdjkhkjqwdhqwkjdhwqkjdqwdwqd}{\rm The Hodge-filtered connective complex ${\mathbf{ku}}$-theory ${\mathbf{ku}}^{\flat}$ is the ${\mathbf{ku}}$-theory analog of the integral Deligne cohomology which would be the Hodge filtered version of $H\Z$. While integral Deligne cohomology is the natural target for cycle maps from Chow groups of algebraic cycles, ${\mathbf{ku}}^{\flat}$ is the natural target of the regulator from algebraic $K$-theory. In \cite{MR3335251} the authors defined for every spectrum over $\mathbf{H} \Z$ a Hodge-filtered version. In an analogous manner, replacing integral Deligne cohomology by ${\mathbf{ku}}^{\flat}$ one could construct Hodge filtered cohomology theories for spectra over ${\mathbf{ku}}$. Observe that
${\mathbf{ku}}^{\flat}$ is the Hodge-filtered version associated to the identity ${\mathbf{ku}}\to {\mathbf{ku}}$. This fact motivates the name.
}
\end{rem}
In view of Lemma \ref{iewfwefewfewfewf} the sheaf
$ {\mathbf{ku}}^{\flat}$ is homotopy invariant (compare \cite[Thm 4.8]{karoubi43}). This fact is reflected in our notation by not using a $\hat{(\dots)}$-decoration.

\bigskip

\begin{ddd}\label{ojlfewfefwefw234234}
We define the regulator ${\tt reg}:{\mathbf{K}}\to {\mathbf{ku}}^{\flat}$ to be the morphism induced by the square \eqref{r223r23r32r32r324rrr} and the universal property of the pull-back square \eqref{ffewfewfwefewfewf234}.
\end{ddd}

\begin{rem}
{\rm Such a regulator has first been defined in \cite[Sec.4]{karoubi45}. Karoubi's regulator provides a factorization
$${\mathbf{K}}\to \mathbf{MK}\to \mathbf{KU}$$
of the map from algbraic to topological $K$-theory.
Our analog is
$${\mathbf{K}}\to {\mathbf{ku}}^{\flat}\to {\mathbf{ku}}\ .$$

}
\end{rem}

\bigskip

\begin{rem}{\rm The map
$${\tt reg}:{\mathbf{K}}\to {\mathbf{ku}}^{\flat}$$ could be considered as a foliated and integral analog of Beilinson's regulator.
In order to see this we show that the classical Beilinson regulator can be factored over the regulator ${\tt reg}$ defined above.

We first interpret real Deligne cohomology as Hodge-filtered ${\mathbf{ku}}\R$-theory. Here as usual, we write ${\mathbf{ku}}\R:={\mathbf{ku}}\wedge M\R$ for the product of ${\mathbf{ku}}$ with the Moore spectrum of $\R$.
The Chern character induces an equivalence of spectra $${\mathbf{ku}}\R\simeq \prod_{p\ge 0}\mathbf{H}\R[2p]\ .$$
The de Rham equivalence $\underline{\mathbf{H}\R}\simeq \mathbf{H}\Omega_{\R}$ provides the second equivalence in the composition
$$\underline{{\mathbf{ku}}\R} \simeq \underline{\prod_{p\ge 0}\mathbf{H}\R[2p]}\simeq \prod_{p\ge 0} \mathbf{H}\Omega_{\R}[2p]\to \mathbf{DD}^{per}\ ,$$
where the last map is the natural inclusion. This composition provides the lower horizontal map in the pull-back square in $\Sh_{\Sp}(\Mf_{\C-fol})$ \begin{equation}\label{wefwefewfewfewfewfw}
\xymatrix{\mathbf{H}_{\R,Del}\ar[d]\ar[r]&\mathbf{DD}^{-}\ar[d]\\
\underline{{\mathbf{ku}}\R}\ar[r]&\mathbf{DD}^{per}}
\end{equation}
which defines $$\mathbf{H}_{\R,Del}\in \Sh^{h}_{\Sp}(\Mf_{\C-fol})\ .$$ On the one hand, this is the Hodge-filtered version of ${\mathbf{ku}}\R$-theory. On the other hand it is a generalization of real Deligne cohomology to foliated manifolds. In fact, for a smooth complex algebraic variety $X$ we have a natural isomorphism \begin{equation}\label{frff877823r87786r8723r32r32r}
\pi_{*}(\mathbf{H}_{\R,Del} (X^{an},T^{0,1}X))\cong \prod_{p\in \nat} H_{Del,an}^{2p-*}(X^{an},\R(p))\ .
\end{equation}
Note that $\mathbf{H}_{\R,Del} $ does not involve the weight-filtration and therefore reflects the "wrong" Hodge filtration on $H^{*}(X^{an};\C)$ for non-proper $X$ .

\bigskip

The natural map ${\mathbf{ku}}\to {\mathbf{ku}}\R$ induces a morphism of pull-back squares $\eqref{ffewfewfwefewfewf234}\to \eqref{wefwefewfewfewfewfw}$ and therefore a morphism,
$$\ch_{\R,Del}: {\mathbf{ku}}^{\flat}\to\mathbf{H}_{\R,Del}\ .$$
The composition
$${\mathbf{K}}\stackrel{{\tt reg}}{\to} {\mathbf{ku}}^{\flat}\stackrel{\ch_{\R,Del}}{\to} \mathbf{H}_{\R,Del}$$ yields indeed Beilinson's regulator if one applies this to the foliated manifolds $(X^{an},T^{0,1}X)$, precomposes with
${\mathbf{K}}^{alg}(X)\to {\mathbf{K}}(X^{an},T^{0,1}X)$, see Example \ref{kdjqwkldjwqldjwqldwqd}, and uses the identification \eqref{frff877823r87786r8723r32r32r}.
This easily follows from the description of Beilinson's regulator given in \cite{Bunke:2012fk}, \cite{Bunke:2013aa}.
}
\end{rem}

Let $(M,\cF)$ be a foliated manifold.

\begin{lem}
If $\mathrm{codim}(\cF)<p$, then we have a natural isomorphism
$${\mathbf{ku}}\C/\Z^{-p-1}(M)\cong ku^{\flat,-p}(M,\cF)\ .$$
\end{lem} \proof This easily follows from
$$\pi_{p}(\mathbf{DD}^{-}(M,\cF))\cong 0\cong \pi_{p+1}(\mathbf{DD}^{-}(M,\cF))\ .$$ \hB

\begin{kor}
If $\mathrm{codim}(\cF)<p$, then the regulator (Definition \ref{ojlfewfefwefw234234}) induces a map
$$\tilde {\tt reg}:K^{-p}(M,\cF)\to {\mathbf{ku}}\C/\Z^{-p-1}(M)\ .$$
\end{kor}

\begin{rem}\label{2ddhi3dhio32doi2doo2oidjud2}{\rm We have a factorization of ${\tt reg}_{X}$ defined in Definition \ref{klfwefewfewfwf}
as
$$\xymatrix{K_{X}^{-p}(*)\ar[r]^{{\tt reg}_{X}}\ar[d]^{\eqref{dkejwdlewdewded}}&{\mathbf{ku}}\C/\Z^{-p-1}(X)\\
K^{-p}(X,\cF_{max})\ar[ur]^{\tilde {\tt reg}}\ar[r]^{\eqref{cejkwdjejdhewdhewkdei832e32e32e32e2e}}_{\cong}&{\mathbf{K}}(\C)^{-p}(X)\ar[u]^{\sigma}}$$
for every $p\ge 1$. Here $\sigma:{\mathbf{K}}(\C)\to {\mathbf{ku}}\C/\Z$ is the morphism discussed e.g. in \cite[7.21]{karoubiast}, \cite[Ex. 6.9]{2013arXiv1311.3188B}.}
\end{rem}

\begin{rem}{\rm In \cite{Bunke:2014aa} we asked whether the map \begin{equation}\label{hdgqhjdgqwhdghjqwgdwhjgdj7861}
K_{p}(C^{\infty}(X))\to K^{top}_{p}(C^{\infty}(X))\end{equation} can be non-trivial for $p>\dim(X)$.
This question has an analog in the foliated case.

Note that
$$\kappa^{*}{\mathbf{K}}\to \underline{{\mathbf{ku}}}$$ (see Example \ref{kjffjewfewjkfhewkjfewfkewfhewf87z} for $\kappa$) is the homotopification morphism.
The question is now: \begin{prob}\label{lkdeqwdqwdqwd} Let $(M,\cF)$ be a foliated manifold and $p\in \nat$ be such that $\mathrm{codim}(\cF)<p$. Is the map
$$K^{-p}(M,\cF)\to {\mathbf{ku}}^{-p}(M)$$
trivial?
\end{prob}

In the special case of a minimal foliation we ask wether $$K^{-p}(X,\cF_{min})\to {\mathbf{ku}}^{-p}(X)$$
can be non-trivial for $p>\dim(X)$.
The difference to \eqref{hdgqhjdgqwhdghjqwgdwhjgdj7861} can best be explained by the commutative diagram
$$ \xymatrix{K_{p}(C^{\infty}(X))\ar[d]\ar[r]&K_{p}^{top}(C^{\infty}(X))\ar[d] \\K^{-p}(X,\cF_{min})\ar[r]&{\mathbf{ku}}^{-p}(X)}\ ,$$
where the vertical maps are induced by sheafification in the $X$-direction.

\bigskip

In the foliation case we can answer the Question \ref{lkdeqwdqwdqwd} affimative at least rationally.
\begin{prop}
Let $(M,\cF)$ be a foliated manifold, $p\in \nat$ such that $\mathrm{codim}(\cF)<p$ and
$x\in K^{-p}(M,\cF)$.
Then the image
$x_{\Q}\in {\mathbf{ku}}\Q^{-p}(M)$ vanishes.\end{prop}\proof Note that the natural map ${\mathbf{ku}}\Q^{*}(M)\to {\mathbf{ku}}\C^{*}(M)$ is injective and that the Bockstein sequence
$${\mathbf{ku}}\C/\Z^{-p-1}(M)\stackrel{\beta}{\to} {\mathbf{ku}}^{-p}(M)\stackrel{c}{\to} {\mathbf{ku}}\C^{-p}(M)$$
is exact. We write $x_{\C}$ for the image of $x_{\Q}$ in ${\mathbf{ku}}\C^{-p}(M)$.
We have
$x_{\C}=c(\beta(\tilde{\tt reg}(x))=0$. \hB

}\end{rem}
\section{Introduction}

In numerous cases of applied mathematics and mathematical physics the solutions to problems can only be represented as series derived by means of some kind of perturbation theory or iterative procedure. A great majority of such series is even divergent, having meaning only as asymptotic series for an infinitesimally small expansion variable. While the considered problems often require to consider finite values of this variable, sometimes even very large values. The standard way of treating such asymptotic series, for the purpose of their extrapolation to the finite values of the variable, is by invoking the Pad\'{e} approximants
\cite{Baker1}. The latter, however, exhibit several deficiencies limiting their applicability, as is discussed in Refs. \cite{Baker1,Gluzman2}, for instance, such a notorious deficiency as the appearance of spurious poles.
Another weak point is the ambiguity of choosing one of the Pad\'{e}
approximants $P_{M/N}$ from the table of many admissible, for each series of order $k$, variants satisfying the condition $M + N + 1 = k$. Also, in the limit of a large variable $x$ the approximant $P_{M/N}(x)$ behaves as
$x^{M - N}$. Hence, only integer powers of $x$ are allowed. It is possible to improve the results by employing the modified Pad\'{e} approximants
\cite{Baker_3}, corresponding to the power $P_{M/N}^\gamma$, with choosing the appropriate value of $\gamma$ satisfying the large-variable limit.

In the present paper, we show that it is possible to formulate a general method for effectively extrapolating and interpolating asymptotic series.
The method enjoys the following advantages: (i) It is unambiguously defined for each given series of order $k$; (ii) It allows for the treatment of large-variable behavior of any type, whether with integer, rational, or irrational powers; (iii) Being more general, it is not less accurate than the method of the Pad\'{e} approximants, when the latter exist, in many cases, being more accurate.

In the great majority of realistic situations, only a few terms of asymptotic expansions are available. Therefore, in the examples below, we do not consider very large series, showing that even several of terms allow us to derive quite accurate approximations.

\section{Self-similar root approximants}

Suppose we are interested in finding a real function $f(x)$ of a real variable $x$. However, this function is defined by a complicated equation that cannot be solved exactly. But, applying a kind of perturbation theory,
we can derive the small-variable behavior of this function
\begin{equation}
\label{1}
f(x) \simeq f_k(x) \qquad ( x \rightarrow 0 ) \; ,
\end{equation} represented by asymptotic series, with the $k$-th order expansion
\begin{equation}
\label{2}
f_k(x) = f_0(x) \left ( 1 + \sum_{n=1}^k a_n x^n \right ) \; ,
\end{equation} where
\begin{equation}
\label{3}
f_0(x) = A x^\alpha \; .
\end{equation}

Sometimes, the large-variable behavior of the function
\begin{equation}
\label{4}
f(x) \simeq f^{(p)}(x) \qquad ( x \rightarrow \infty )
\end{equation} is also known and can be represented by an expansion over $1/x$ as
\begin{equation}
\label{5}
f^{(p)}(x) = f_\infty(x) \left ( 1 + \sum_{n=1}^p \; \frac{b_n}{x^n}
\right ) \; ,
\end{equation} with
\begin{equation}
\label{6}
f_\infty(x) = B x^\beta \; .
\end{equation}

For what follows, it is convenient to deal with the ratio $f(x)/f_0(x)$,
which at small variable $x \rightarrow 0$ behaves as
\begin{equation}
\label{7}
\frac{f(x)}{f_0(x)} \simeq \frac{f_k(x)}{f_0(x)} = 1 +
\sum_{n=1}^k a_n x^n \; ,
\end{equation} and at large values of the variable $x \rightarrow \infty$ it tends to
\begin{equation}
\label{8}
\frac{f(x)}{f_0(x)} \simeq \frac{f_\infty(x)}{f_0(x)} =
\frac{B}{A} \; x^{\beta-\alpha} \; .
\end{equation}

The extrapolation of the small-variable expansions to the large-variable region can be done by means of self-similar approximation theory
\cite{Yukalov_4,Yukalov_5,Yukalov_6,Yukalov_7,Yukalov_8}. In this approach,
the transfer from a $k$-th order approximation, say, a small-variable expansion, to the higher orders of approximations are treated as the motion with respect to the approximation order $k$ playing the role of discrete time. Constructing a dynamical system, whose trajectory is bijective to the sequence of approximations, makes it feasible to find a fixed point representing the sought function. The convergence to the fixed point is governed by control functions. The self-similar approximation theory combines the methods of optimal control theory, dynamical theory,
and renormalization-group approach. We shall not go into the details and mathematical justification of the self-similar approximation theory that has been thoroughly expounded in Refs.
\cite{Yukalov_4,Yukalov_5,Yukalov_6,Yukalov_7,Yukalov_8}, but we shall use some of its consequences.

Employing this theory for the purpose of interpolation between the small-variable and large-variable regions, it is possible to come
\cite{Yukalov_9,Gluzman_10,Yukalov_11} to the self-similar root approximant
\begin{equation}
\label{9}
\frac{f^*_k(x)}{f_0(x)} = \left ( \left ( \ldots ( 1 + A_1 x )^{n_1}
+ A_2 x^2 \right )^{n_2} + \ldots + A_kx^k \right )^{n_k} \; .
\end{equation} A theorem has been proved \cite{Yukalov_12} stating that all parameters
$A_i$ and powers $n_i$ of approximant (9) are uniquely defined through the large-variable form (5).

However, the root approximant (9) cannot be uniquely defined through the small-variable expansion (2). This hinders the applicability of the approximant (9), since in the majority of cases, the small-variable expansion is better known, providing a number of terms, while the knowledge of the large-variable behavior is limited by just a single term (6), often even without precise data for the amplitude $B$. In order to extend the applicability of approximant (9) to be uniquely defined through the small-variable expansion, it is necessary to impose some constraints on the powers $n_j$. Such a straightforward constraint is the requirement that all parameters $A_j$ of approximant (9) be involved in the definition of the large-variable limit, which implies the relation
\begin{equation}
\label{10}
n_j = \frac{j+1}{j} \qquad ( j = 1,2,\ldots, k-1) \; ,
\end{equation} with $n_k = \beta - \alpha$. By expanding Eq. (9) in powers of $x$, it is easy to prove that all parameters $A_j$ are uniquely defined through the coefficients $a_j$ of small-variable expansion (2). In addition, we can require the validity of the limiting form (6), which improves accuracy.

The self-similar root approximant (9), with conditions (6) and (10),
whose parameters $A_j$ are uniquely defined by the accuracy-through-order procedure and are expressed through the coefficients $a_j$ of the small-variable expansion (2), can be called, for short, the {\it root approximant}. In the following sections, we demonstrate that this root approximant provides quite accurate approximations for different problems,
uniformly extrapolating the small-variable expansion (2), valid for
$x \rightarrow 0$, to the whole region of $x \in [0,\infty]$.

\section{Illustration by simple examples}

Before going to more complicated problems, we show the efficiency of the method by simple cases.

\subsection{Hard-core scattering problem}

Let us start the illustration of the method from the problem considered by Baker and Gammel \cite{Baker_3}. When calculating the scattering length of a repulsive square-well potential, one meets the integral
$$
S(x) = \int_0^x \left ( \frac{\sin t}{t^3} \; - \; \frac{\cos t}{t^2}
\right )^2 \; dt \; ,
$$
whose limit, as $x \rightarrow \infty$, equals $\pi/15$. Baker and Gammel state that this integral cannot be correctly evaluated by the standard Pad\'{e} method.
To solve the problem, they suggest a modified method employing a power of the Pad\'{e} approximant. We show below that such integrals can easily be treated by means of the root approximants.

The small-variable expansion of this integral reads as
$$
S(x) \simeq \frac{x}{9} \; - \; \frac{x^3}{135} + \frac{x^5}{2625} \; - \;
\frac{4x^7}{297675} + \frac{2x^9}{5893965} \; - \; \frac{x^{11}}{166080925} +
\frac{x^{13}}{10672286625} \; .
$$
Comparing this with form (2), we have $S_0(x) = x/9$. Since expansion (2)
is in powers of $x^2$, we construct the root approximants (9) using $x^2$ as a variable. Thus, the root approximant of third order is
$$
S_3^*(x) = \frac{x}{9} \left ( \left ( \left ( 1 + A_1 x^2 \right )^2 +
A_2 x^4 \right )^{3/2} + A_3 x^6 \right )^{-1/6}\; ,
$$
where the parameters are
$$
A_1 = 0.133333 \; , \qquad A_2 = 0.012952 \; , \qquad A_3 = 0.016907 \; .
$$
To fourth order,
$$
S_4^*(x) = \frac{x}{9} \left ( \left ( \left ( \left ( 1 + A_1 x^2
\right )^2 + A_2 x^4 \right )^{3/2} + A_3 x^6 \right )^{4/3} +
A_4 x^8 \right )^{-1/8}\; ,
$$
where
$$
A_1 = 0.133333 \; , \qquad A_2 = 0.012952 \; , \qquad A_3 = 0.002757 \; ,
\qquad A_4 = 0.004636 \; .
$$
To fifth order,
$$
S_5^*(x) = \frac{x}{9} \left ( \left ( \left ( \left ( \left ( 1 + A_1 x^2
\right )^2 + A_2 x^4 \right )^{3/2} + A_3 x^6 \right )^{4/3} +
A_4 x^8 \right )^{5/4} + A_5 x^{10} \right )^{-1/10} \; ,
$$
where
$$
A_1 = 0.133333 \; , \qquad A_2 = 0.012952 \; , \qquad A_3 = 0.002757 \; ,
$$
$$
A_4 = 0.000578 \; , \qquad A_5 = 0.001285 \; .
$$
And to sixth order,
$$
S_6^*(x) = \frac{x}{9} \left ( \left ( \left ( \left ( \left ( \left (
1 + A_1 x^2 \right )^2 + A_2 x^4 \right )^{3/2} + A_3 x^6 \right )^{4/3} +
A_4 x^8 \right )^{5/4} + \right. \right.
$$
$$
\left. \left. +
A_5 x^{10} \right )^{6/5} + A_6 x^{12}
\right )^{-1/12} \; ,
$$
where
$$
A_1 = 0.133333 \; , \qquad A_2 = 0.012952 \; , \qquad A_3 = 0.002757 \; ,
$$
$$
A_4 = 0.000578 \; , \qquad A_5 = 0.000137 \; , \qquad A_6 = 0.000356 \; .
$$
All these approximants converge to $\pi/15$, as $x \rightarrow \infty$. The higher the approximant order, the faster the convergence.

\subsection{Debye function}

The $n$-th order Debye function is defined \cite{Abramowitz_13} through the integral representation
$$
D(n,x) \equiv \frac{n}{x^n} \int_0^x \frac{t^n}{e^t -1 } \; dt \; .
$$
For $|x| < 2 \pi$ and $n \geq 1$, it possesses the expansion
$$
D(n,x) \simeq 1 \; - \; \frac{n}{2(n+1)} \; x \; + \;
n \sum_{k=1}^\infty \frac{B_{2k}}{(2k+n)(2k)!} \; x^{2k} \; ,
$$
in which $B_{2k}$ are Bernoulli numbers. At large $x$ and ${\rm Re}\; n > 0$,
one has
$$
D(n,x) \simeq \frac{C_n}{x^n} \qquad ( x\rightarrow \infty , \; {\rm Re}\; n>0 ) \; ,
$$
where
$$
C_n \equiv n \Gamma(n+1) \zeta(n+1) \; .
$$

Below, we consider the case of $n = 3$, corresponding to the Debye function
$$
D(x) \equiv D(3,x) = \frac{3}{x^3} \int_0^x \frac{t^3}{e^t-1} \; dt \; .
$$
The small-variable expansion for the latter takes the form
$$
D(x) \simeq 1 - \; \frac{3}{8} \; x + \sum_{k=1}^\infty a_{2k} x^{2k}
\qquad ( x \rightarrow 0 ) \; ,
$$
in which
$$
a_{2k} = \frac{B_{2k}}{(2k+3)(2k)!} \; .
$$
While the large-variable behavior is given by the expression
$$
D(x) \simeq \frac{C_3}{x^3} \qquad ( x \rightarrow \infty) \; ,
$$
with
$$
C_3 = \frac{\pi^4}{5} = 19.481818 \; .
$$

Constructing the root approximant
$$
D_5^*(x) = \frac{x}{9} \left ( \left ( \left ( \left ( \left ( 1 + A_1 x
\right )^2 + A_2 x^2 \right )^{3/2} + A_3 x^3 \right )^{4/3} +
A_4 x^4 \right )^{5/4} + A_5 x^5 \right )^{-3/5} \; ,
$$
we compare it with the exact numerical values of the function $D(x)$ and find that $D^*_5(x)$ approximates well this function in the whole region of
$x \in [0,\infty]$, with the maximal error of $15 \%$ at $x = 5$. The best two-point Pad\'{e} approximant of the same order, $P_{1/4}(x)$, is less accurate, yielding the maximal error of $33 \%$ at $x = 15$.

\subsection{Fermi-Dirac integral}

The general form of the $j$-th order Fermi-Dirac integral is
$$
F(j,x) = \frac{1}{\Gamma(j+1)} \int_0^\infty \frac{t^j}{e^{t-x}+1} \; dt \; .
$$
Its asymptotic expansions are known \cite{Dingle_14}.

For concreteness, let us consider the zero-order case that reduces to the function
$$
F(x) \equiv F(0,x) = \ln \left ( 1 + e^x \right ) \; .
$$
At small $x$, this function tends to $\ln 2$, and at large $x$, we have
$$
F(x) \simeq x \qquad ( x \rightarrow \infty ) \; .
$$

The root approximant
$$
F_5^*(x) = \ln 2 \left ( \left ( \left ( \left ( \left ( 1 + A_1 x
\right )^2 + A_2 x^2 \right )^{3/2} + A_3 x^3 \right )^{4/3} +
A_4 x^4 \right )^{5/4} + A_5 x^5 \right )^{1/5} \; ,
$$
where
$$
A_1 = 0.721348 \; , \qquad A_2 = 0.360674 \; , \qquad A_3 = 0.390257 \; ,
$$
$$
A_4 = 0.410334 \; , \qquad A_5 = 4.294519 \; ,
$$
provides an accurate approximation for the function $F(x)$ in the whole region of $x \in [0,\infty]$, the maximal error being $5 \%$. The two-point Pad\'{e} approximant $P_{3/2}(x)$ is slightly less accurate, with the maximal error of $6 \%$.

\subsection{Fekete-Szeg\"o problem}

The problem of maximizing the absolute value of a functional in subclasses of normalized functions is called the Fekete-Szeg\"o problem
\cite{Fekete_15,Dziok_16}. The Fekete-Szeg\"o functional is bounded by the function
$$
f(x) = 1 + 2 \exp \left ( - \; \frac{2x}{1-x} \right ) \; ,
$$
where $0 < x < 1$.

In order to consider the interval $[0, \infty]$, as in other examples, we can use the change of the variable
$$
x = \frac{z}{1+z} \; , \qquad z = \frac{x}{1-x} \; .
$$
Then $z \rightarrow \infty$ as $x \rightarrow 1$. Expanding $F(z)$ at small $z$ gives
$$
F(z) \equiv f(x(z)) \simeq 3 - 4z + 4z^2 -\; \frac{8}{3} \; z^3 +
\frac{4}{3} \; z^4 \; - \; \frac{8}{15} \; z^5 \; .
$$
The root approximant $F^*_3(z)$ uniformly approximates the function $F(z)$
on the interval $z \in [0,\infty]$, with the maximal error about $10 \%$.
The two-point Pad\'{e} approximant $P_{2/2}(z)$ is worse, having the maximal error twice larger than the root approximant $F^*_3(z)$.

\section{Some useful tricks}

It is important to mention some tricks allowing for the convenient use of the method. Below, we discuss the interchange of small-variable and large -variable limits and the problem of dealing with logarithms.

\subsection{Inversion of expansions}

In the above examples, we have considered functions, whose expansions are better known for the small-variable limit, while a few, or just a single term, are available in the large-variable limit. But generally, the small-variable and large-variable limits are interchangeable. In those cases, when the large-variable expansion in powers of $1/x$ provides a number of terms and this expansion enjoys better convergence properties,
it is possible to inverse the small-variable limit to the large-variable limit by using the variable change $x = 1/t$. Then, instead of the function
$f(x)$, we consider the function
\begin{equation}
\label{11}
F(t) \equiv f \left ( \frac{1}{t} \right ) \; , \qquad t = \frac{1}{x} \; .
\end{equation}

The small-variable limit (1) becomes the large-variable limit
\begin{equation}
\label{12}
F(t) \simeq F^{(k)}(t) \equiv f_k\left ( \frac{1}{t} \right ) \qquad
( t \rightarrow \infty ) \; ,
\end{equation} in which
\begin{equation}
\label{13}
F^{(k)}(t) = F_\infty(t) \left ( 1 +
\sum_{n=1}^k \frac{a_n}{t^n} \right ) \; ,
\end{equation} with
$$
F_\infty(t) \equiv f_0\left ( \frac{1}{t} \right ) = A t^{-\alpha} \; .
$$

Conversely, the large-variable behavior (4) transforms to the small-variable behavior
\begin{equation}
\label{14}
F(t) \simeq F_p(t) \equiv f^{(p)} \left ( \frac{1}{t} \right ) \qquad
( t \rightarrow 0 ) \; ,
\end{equation} in which
\begin{equation}
\label{15}
F_p(t) = F_0(t) \left ( 1 + \sum_{n=1}^p b_n t^n \right ) \; ,
\end{equation} where
$$
F_0(t) \equiv f_\infty \left ( \frac{1}{t} \right ) = B t^{-\beta} \; .
$$

After this change of the variable it is straightforward to employ the same procedure of constructing the root approximants, as is explained in Sec. 2.

More generally, it is possible to use the change of the variable $t = 1/x^s$,
with a positive power $s > 0$, so that again $t \rightarrow \infty$, when $x \rightarrow 0$.

\subsection{Example of inversion}

As an illustration of the inversion procedure, we give below a typical example,
discussing it rather briefly, since the whole method of constructing the root approximants is the same as before.

Let us consider the partition function of the so-called zero-dimensional oscillator, or the generating functional of zero-dimensional $\varphi^4$ field theory, which is defined through the integral
$$
I(x) = \frac{1}{\sqrt{\pi}} \int_{-\infty}^\infty \exp \left ( - \varphi^2
- x \varphi^4 \right ) \; d\varphi \; ,
$$
where $x$ plays the role of a coupling parameter. In the weak-coupling limit,
one has \cite{Yukalov_17} the asymptotic expansion
$$
I(x) \simeq 1 + \sum_{n=1}^\infty a_n x^n \qquad ( x \rightarrow 0 ) \; ,
$$
in which the coefficients are
$$
a_n = \frac{(-1)^n}{\sqrt{\pi}\; n!} \;
\Gamma\left ( 2n + \frac{1}{2} \right ) \; .
$$
For instance
$$
a_1 = -\; \frac{3}{4} \; , \qquad a_2 = \frac{105}{32} \; , \qquad a_3 = - \; \frac{3465}{128} \; ,
$$
and so on.

The strong-coupling expansion reads as
$$
I(x) \simeq 1.022765 \; x^{-1/4} - 0.345684\; x^{-3/4} +
0.127846 \; x^{-5/4} \qquad ( x \rightarrow \infty) \; .
$$
Here the strong-coupling expansion provides a number of terms. Moreover,
the absolute values of the coefficients in this expansion diminish with increasing order, contrary to the coefficients $a_n$ in the weak-coupling expansion, which grow as $n^n$ with increasing order $n$. This makes the strong-coupling expansion more suitable for constructing root approximants.

Resorting to the change of the variable $x = 1/t^4$, we consider the function
$J(t) \equiv I(1/t^4)$ and follow the scheme of the previous section. We define the root approximants $J^*_k(t)$ that give us the approximants
$I^*_k(x) = J^*_k(1/x^{1/4})$ for the sought function. Found in that way approximant $I^*_3(x)$ has the maximal error of $5 \%$ for the whole range of $x \in [0, \infty]$. For comparison, the Pad\'{e} approximant $P_{1/2}(x)$
has the maximal error of about $20 \%$, which is much less accurate.

\subsection{Dealing with logarithms}

It is worth paying attention to the problem of series involving logarithms,
which often appear in physics applications. Such series do not yield any complication for the method of root approximants described here.There are two equivalent ways of treating such series. Thus, if a series contains the terms with $x^n$, $x^{n+1}$, and with $x^n \ln x$, then it is admissible to consider as the terms of one order either those containing
$x^n$ and $x^n \ln x$ or the terms $x^{n+1}$ and $x^n \ln x$.

As an illustration, let us consider, e.g., the typical form of such a series involving logarithms as that one arising in the Nambu-Iona Lasinio model
\cite{Kunihiro_18} and leading to the function
$$
f(x) = x \left [ \sqrt{1 + x^2} \; - \; x^2 \ln\left (
\frac{1+\sqrt{1+x^2}}{x} \right ) \right ] \; ,
$$
where $x$ plays the role of mass. At asymptotically small $x$, it follows
$$
f(x) \simeq x + \left ( \frac{1}{2} - \ln 2 + \ln x \right ) x^3 \qquad
(x \rightarrow 0 ) \; .
$$
While at large $x$, one has
$$
f(x) \simeq \frac{2}{3} - \frac{1}{5x^2} + \frac{3}{28x^4} \qquad
( x \rightarrow \infty ) \; .
$$
Keeping in mind the dependence of the last expansion on $1/x^2$, it is convenient to use the variable $z = 1/x^2$. The root approximant, satisfying the required limits, has the form
$$
f_4^*(x) = \frac{2}{3} \left ( \left ( \left ( 1 + A_1 z \right )^2 +
A_2 z^2 \right )^{3/2} + A_3 z^2 \ln ( 1 + z ) + A_4 z^3 \right )^{-1/6} \; ,
$$
with all parameters uniquely defined by the given expansions. This expression approximates well the initial function $f(x)$, with the maximal error of $2 \%$
at $x \approx 2$. Contrary to this, the best Pad\'{e} approximant of the same order has the error of $11 \%$ at $x \approx 1.5$.

\section{Ground-state energy of electron gas}

Important and not trivial problems arise when studying the properties of charged systems \cite{Loos_19,Cioslowski_20,Cioslowski_21}. Here we show how our method works for the case of homogeneous electron systems.

\subsection{One-dimensional electron gas}

The Hartree-Fock part of the uniform electron energy is well known. The problem arises in calculating the {\it correlation energy}. The latter is usually presented in a reduced dimensionless form $\varepsilon(r_s)$ as a function of the Seitz radius $r_s$. High-density expansion for one-dimensional uniform electron gas \cite{Loos_22} corresponds to small
$r_s$, when for the correlation energy one has
$$
\varepsilon(r_s) \simeq C + 0.00845 r_s \qquad (r_s \rightarrow 0 ) \; ,
$$
where
$$
C = -\; \frac{\pi^2}{360} = - 0.027416 \; .
$$
The low-density expansion \cite{Loos_22} implies large $r_s$, when
$$
\varepsilon(r_s) \simeq \frac{b_1}{r_s} + \frac{b_2}{r_s^{3/2}} \qquad
(r_s \rightarrow \infty) \; ,
$$
where
$$
b_1 = - \left ( \ln \sqrt{2\pi} \; - \; \frac{3}{4} \right ) =
-0.168939 \; , \qquad b_2 = 0.359933 \; .
$$

The root approximant, enjoying the same expansions, but valid for arbitrary
$r_s$ reads as
$$
\varepsilon_3^*(r_s) = -\; \frac{\pi^2}{360}\; \left ( \left ( ( 1+ A_1 r_s)^{3/2}
+ A_2 r_s^2 \right )^{5/4} + A_3 r_s^3 \right )^{-1/3} \; ,
$$
with the parameters
$$
A_1 = 0.493150 \; , \qquad A_2 = 0.056122 \; , \qquad A_3 = 0.004274 \; .
$$
Comparing the prediction of the root approximant with the data from diffusion Monte Carlo calculations \cite{Loos_22} in the interval $0 < r_s < 20$, we find that the maximal error of $\varepsilon^*_3$ is $8\%$. Pad\'{e}
approximants give the errors between $2\%$ and $10\%$. Thus,
$P_{1/2}(\sqrt{r_s})$ has the error of $2\%$, while $P_{0/3}(\sqrt{r_s})$ has the maximal error of $10\%$. The Cioslowski interpolation method
\cite{Cioslowski_23} results \cite{Loos_22} in a better accuracy of $1\%$.
However, this method includes an additional parameter that is fitted from numerical Monte Carlo calculations. While our aim has been in constructing good approximations without fitting parameters, being based only on asymptotic expansions. The principal importance of avoiding fitting parameters is crucial for those problems where no exact numerical data are available.

\subsection{Two-dimensional electron gas}

Correlation energy of a homogenous two-dimensional electron gas was studied in several articles, e.g., in Refs.
\cite{Sim_24,Tanatar_25,Kwon_26,Attaccalite_27,Gori_28,Constantin_29,
Drummond_30,Loos_31}. In high-density limit (small $r_s$), the ground-state energy reads \cite{Loos_31} as
$$
E_0(r_s) \simeq \frac{c_{-2}}{r_s^2} + \frac{c_{-1}}{r_s} +
\varepsilon(r_s) \qquad (r_s \rightarrow 0) \; ,
$$
where the first two terms constitute the Hartree-Fock energy, with
$$
c_{-2} = \frac{1}{2} \; , \qquad c_{-1} = -\; \frac{4\sqrt{2}}{3\pi} \; .
$$
And the last term is the correlation energy
$$
\varepsilon(r_s) \simeq c_0 + c_1' r_s \ln r_s \qquad (r_s \rightarrow 0) \; ,
$$
with the coefficients
$$
c_0 = -0.192495 \; , \qquad c_1' = -\sqrt{2} \left ( \frac{10}{3\pi}
- 1 \right ) = - 0.0863136 \; .
$$

In the low-density limit (large $r_s$) the asymptotic expansion for the correlation energy can be written \cite{Kwon_26} as
$$
\varepsilon(r_s) \simeq \frac{b_1}{r_s} + \frac{b_2}{r_s^{3/2}} +
\frac{b_3}{r_s^2} \qquad (r_s \rightarrow \infty) \; ,
$$
where
$$
b_1 = -0.472189 \; , \qquad b_2 = 0.4964 \; , \qquad b_3 = 0.5297 \; .
$$

For intermediate $r_s$, there have been suggested
\cite{Attaccalite_27,Gori_28,Drummond_30} several phenomenological expressions with parameters fitted from Monte Carlo calculations. Thus,
Gori-Giorgi et al. \cite{Gori_28} suggested the form
$$
\varepsilon(r_s) = A_0 + \left ( B_0 r_s + C_0 r_s^2 + D_0 r_s^3
\right ) \; \ln \left ( 1 +
\frac{1}{E_0 r_s + F_0 r_s^{3/2}+G_0 r_s^2 +H_0 r_s^3}
\right ) \; ,
$$
with the parameters
$$
A_0 = - 0.1925 \; , \qquad B_1 = 0.0863136 \; , \qquad C_0 = 0.057234 \; , \qquad D_0 = 0.003362896 \; .
$$
$$
E_0 = 1.0022 \; , \qquad F_0 = -0.02069 \; , \qquad G_0 = 0.34 \; , \qquad H_0 = 0.01747 \; .
$$
This expression can be used as a numerical result for estimating the accuracy of approximate analytic formulas.

The root approximant, satisfying all asymptotic expansions reads as
$$
\varepsilon_5^*(r_s) = \frac{b_1}{r_s} \left ( \left ( \left ( 1 +
\frac{A_1}{\sqrt{r_s}} \right )^2 + \frac{A_2}{r_s} \right )^{3/2}
+ \frac{A_3}{r_s}\; \ln \left ( 1 + \frac{1}{\sqrt{r_s}} \right )
+ \frac{A_4}{r_s^{3/2}} + \frac{A_5}{r_s^2} \right )^{-1/2} \; ,
$$
where the parameters are
$$
b_1 = - 0.472189 \; , \qquad A_1 = 0.700849 \; , \qquad A_2 = 2.723702 \; ,
$$
$$
A_3 = 10.792193 \; , \qquad A_4 = -5.764339 \; , \qquad A_5 = 6.017150 \; .
$$
The error of this approximant is about $5\%$.

\section{Systems with spherical symmetry}

Finite quantum systems often enjoy spherical symmetry. Below, we consider two examples of such systems that are important for applications.

\subsection{Energy of harmonium atoms}

An $N$-electron harmonium atom is described by the Hamiltonian
$$
\hat H =
\frac{1}{2} \sum_{i=1}^N \left ( - \nabla_i^2 + \omega^2 r_i^2 \right ) +
\frac{1}{2} \sum_{i\neq j}^N \frac{1}{r_{ij}} \; ,
$$
where dimensionless units are employed and
$$
r_i \equiv | {\bf r}_{i} | \; , \qquad r_{ij} \equiv | {\bf r}_i - {\bf r}_j | \; .
$$
This Hamiltonian provides a rather realistic modeling of trapped ions,
quantum dots, and some other finite systems, such as atomic nuclei and metallic grains \cite{Birman_32}. This is why the energy of harmonium atoms has been intensively studied
\cite{Cioslowski_33,Cioslowski_34,Cioslowski_35,Cioslowski_36,Cioslowski_37}.
Here we show that root approximants give a good approximation for the energy of such systems. We consider the ground-state energy of a two-electron harmonium.

At a shallow harmonic potential, the energy can be expanded
\cite{Cioslowski_23} in powers of $\omega$, so that
$$
E(\omega) \simeq E_k(\omega) \qquad (\omega\rightarrow 0) \; ,
$$
with the truncated series
$$
E_k(\omega) = \sum_{n=0}^k c_n \omega^{(2+n)/3} \; .
$$
For instance, to third order, we get
$$
E_3(\omega) = c_0 \omega^{2/3} + c_1 \omega + c_2 \omega^{4/3} \; ,
$$
with the coefficients
$$
c_0 = \frac{3}{2^{4/3}} = 1.19055 \; , \qquad c_1 = \frac{1}{2} \; \left ( 3 + \sqrt{3} \right ) = 2.36603 \; ,
\qquad c_2 = \frac{7}{36}\; 2^{-2/3} = 0.122492 \; .
$$

And for a rigid potential, the energy is approximated \cite{Cioslowski_23}
as
$$
E(\omega) \simeq E^{(p)}(\omega) \qquad (\omega\rightarrow\infty) \; ,
$$
where
$$
E^{(p)}(\omega) = \sum_{n=0}^p b_n \omega^{(2-n)/2} \; .
$$
To fourth order, one has
$$
E^{(4)}(\omega) = b_0 \omega + b_1 \omega^{1/2} + b_2 + b_3 \omega^{-1/2} \; ,
$$
where
$$
b_0 = 3 \; , \qquad b_1 = \sqrt{\frac{2}{\pi} } = 0.797885 \; , \qquad b_2 = -\;\frac{2}{\pi}\; \left ( 1 - \; \frac{\pi}{2} +
\ln 2 \right ) = - 0.077891 \; ,
$$
$$
b_3 = \left ( \frac{2}{\pi}\right )^{3/2}\; \left [ 2 - 2G - \;
\frac{3}{2}\; \pi + ( \pi + 3) \ln 2 + \frac{3}{2}\; ( \ln 2)^2 - \;
\frac{\pi^2}{24} \right ) = 0.0112528 \; ,
$$
with the Catalan constant
$$
G \equiv \sum_{n=0}^\infty \frac{(-1)^n}{(2n+1)^2} =
0.91596559 \; .
$$

The root approximant, respecting all given small-$\omega$, as well as large-$\omega$ expansions, is
$$
E_6^*(\omega) = c_0 \omega^{2/3} \left ( \left ( \left ( \left ( \left (
\left ( 1 + A_1 \omega^{1/3} \right )^{1/2} + A_2 \omega^{2/3} \right )^{3/4}
+ A_3 \omega \right )^{5/6} + A_4 \omega^{4/3} \right )^{7/8} + \right. \right.
$$
$$
\left. \left. +
A_5 \omega^{5/3} \right )^{9/10} + A_6 \omega^2 \right )^{1/6} \; ,
$$
with the parameters
$$
c_0 = 1.19055 \; , \qquad A_1 = 48.4532 \; , \qquad A_2 = 564.108 \; ,
$$
$$
A_3 = 1088.39 \; , \qquad A_4 = 1221.08 \; , \qquad A_5 = 796.791 \; , \qquad A_6 = 256 \; .
$$
We estimate the accuracy of the root approximant comparing it with the numerical data from Ref. \cite{Matito_38} and find that its maximal error is only $0.9 \%$. Note that Pad\'{e} approximants cannot be used in the case of harmonium, since the small-variable and large-variable asymptotic expansions are incompatible.

\subsection{Energy of two-electron spherium}

The two-electron spherium is a system consisting of two electrons that are confined to the surface of a sphere of radius $R$. The ground-state energy of the system \cite{Cioslowski_23,Loos_39} possesses the small-radius expansion
$$
E(R) \simeq \frac{1}{R} + c_0 + c_1 R + c_2 R^2 + c_3 R^3 \qquad
(R \rightarrow 0) \; ,
$$
in which
$$
c_0 = 4\ln 2 - 3 = - 0.22741128 \; , \qquad c_1 = 8(\ln 2)^2 - 40 \ln 2 + 24 = 0.11773689 \; ,
$$
$$
c_2 = -0.05027560 \; , \qquad c_3 = 0.01395783 \; .
$$
The coefficients $c_2$ and $c_3$ can also be expressed in closed forms that,
however, are too much cumbersome \cite{Loos_39}, because of which we give here only their numerical values.

In the large-radius limit, the energy has the expansion
$$
E(R) \simeq \frac{1}{2R} + \frac{1}{2R^{3/2}} \; - \; \frac{1}{8R^2} \;
- \; \frac{1}{128 R^{5/2}} \qquad (R \rightarrow \infty ) \; .
$$

The root approximant can be writen in the form
$$
E_5^*(R) = \frac{1}{R} + c_0 \left ( \left ( \left (
\left ( ( 1 + A_1 R)^{3/2} + A_2 R^2 \right )^{5/4} + A_3 R^3 \right )^{7/6}
+ A_4 R^4 \right )^{9/8} + A_5 R^5 \right )^{-1/5} \; ,
$$
where
$$
A_1 = 1.05188915 \; , \qquad A_2 = 0.56453530 \; , \qquad A_3 = 0.36000617 \; ,
$$
$$
A_4 = 0.12606787 \; , \qquad A_5 = 0.01946301 \; .
$$
Comparing this expression with numerical data \cite{Loos_39}, we find that the maximal error occurs at $R = 20$, being only $0.1 \%$. The best Pad\'{e}
approximant $P_{5/5}(\sqrt{R})$ is much less accurate, having the maximal error, also at $R = 20$, but an order larger, $1.5 \%$.

\section{Discussion}

We have described a simple and general method for interpolating functions between their small-variable and large-variable asymptotic expansions. The method is based on the construction of self-similar root approximants enjoying the general form
$$
f^*_k(x) = f_0(x) \left ( \left ( \left ( \ldots ( 1 + A_1 x )^{n_1} +
A_2 x^2 \right )^{n_2} + A_3 x^3 \right )^{n_3} + \ldots +
A_k x^k \right )^{n_k} \; .
$$
All parameters $A_i$ can be uniquely defined through the corresponding asymptotic expansions. By changing the variable, it is easy to inverse the expansions between the small-variable and large-variable limits.

Our aim has been to suggest a method that would involve no fitting parameters. This is especially important in those complicated cases, where numerical data in the whole region of the variable are not available. The absence of fitting parameters makes our aproach different from other intrepolation methods, such as the Cioslowski method \cite{Cioslowski_23}.

We have demonstrated the method of root approximants by several examples,
whose structure is typical for many applications, including the hard-core scattering problem, Debye function, Fermi-Dirac integral, Fekete-Szeg\"{o}
problem, zero-dimensional oscillator, homogeneous electron gas, harmonium atom, and spherium.

We have analyzed several more problems, e.g., the interpolation of the polaron mass between weak-coupling and strong-coupling limits studied earlier by the Feynman variational procedure \cite{Feynman_39} and by other methods
\cite{Feranchuk_40,Alexandrou_41,Kleinert_42,Kornilovitch_43}. Our approach provides approximations, whose accuracy is comparable or better than that of other methods, being at the same time more simple.

Generally, the suggested method provides the accuracy not worse than the method of Pad\'{e} approximants and in the majority of cases is more accurate than the latter.

Except the root approximants of the general form (9), we also have considered
{\it additive approximants} represented by the sums
$$
f^*_{M/N}(x) = \sum_{i=1}^{(M+N)/2} A_i ( 1 + B_i x)^{n_i} \; .
$$
This type of expressions can be considered either as additive root approximants or an additive variant resulting from self-similar factor approximants
\cite{Gluzman_44}.

For example, in the case of one-dimensional electron gas, the correlation energy is approximated as
$$
\varepsilon^*_{2/2}(r_s) = A_1 ( 1 + B_1 r_s )^{-1} +
A_2 ( 1 + B_2 r_s )^{-3/2} \; ,
$$
with the parameters
$$
A_1 = -0.044941 \; , \qquad A_2 = 0.017526 \; , \qquad B_1 = 0.266023 \; , \qquad B_2 = 0.133344 \; .
$$
This expression has the maximal error of $11\%$. However a more detailed analysis of such additive approximants requires a separate investigation,
which is out of the scope of the present paper.

\vskip 2cm

\acknowledgments{Acknowledgments}

One of the authors (V.I.Y.) acknowledges financial support from the Russian Foundation for Basic Research (grant 14-02-00723) and is grateful for useful discussions to E.P. Yukalova.

\conflictofinterests{Conflicts of Interest}

The authors declare no conflict of interest.

\newpage

\end{document}
\title{
Projection method and new formulation of leading-order anisotropic hydrodynamics
}

\begin{abstract}
The introduced earlier projection method for boost-invariant and cylindrically symmetric systems is used to introduce a new formulation of anisotropic hydrodynamics that allows for three substantially different values of pressure acting locally in three different directions. Our considerations are based on the Boltzmann kinetic equation with the collision term treated in the relaxation time approximation and the momentum anisotropy is included explicitly in the leading term of the distribution function. A novel feature of our work is the complete analysis of the second moment of the Boltzmann equation, in addition to the zeroth and first moments that have been analyzed in earlier studies. We define the final equations of anisotropic hydrodynamics in the leading order as a subset of the analyzed moment equations (and their linear combinations) which agree with the Israel-Stewart theory in the case of small pressure anisotropies.
\end{abstract}

\section{Introduction}
\label{sect:intro}

Successful applications of relativistic viscous hydrodynamics in the description of heavy-ion collisions at RHIC (Relativistic Heavy-Ion Collider) and the LHC (Large Hadron Collider) triggered large interest in the development of the hydrodynamic framework
\cite{Israel:1976tn,Israel:1979wp,
Muronga:2001zk,Muronga:2003ta,
Baier:2006um,Baier:2007ix,
Romatschke:2007mq,Dusling:2007gi,Luzum:2008cw,
Song:2008hj,El:2009vj,PeraltaRamos:2010je,
Denicol:2010tr,Denicol:2010xn,
Schenke:2010rr,Schenke:2011tv,
Bozek:2009dw,Bozek:2011wa,
Niemi:2011ix,Niemi:2012ry,
Bozek:2012qs,Denicol:2012cn,Jaiswal:2013npa}. An example of the new approach to relativistic dissipative hydrodynamics is {\it anisotropic hydrodynamics} \cite{Florkowski:2010cf,Martinez:2010sc,
Ryblewski:2010bs,Martinez:2010sd,
Ryblewski:2011aq,Martinez:2012tu,
Ryblewski:2012rr,Ryblewski:2013jsa,
Florkowski:2012ax,Florkowski:2012as} --- the framework where effects connected with the expected high pressure anisotropy of the produced matter are included in the leading order of the hydrodynamic expansion. Very recently, also the second order anisotropic hydrodynamics has been formulated by Bazow, Heinz, and Strickland~\cite{Bazow:2013ifa}. The~new approach introduced in \cite{Bazow:2013ifa} allows for description of arbitrary transverse expansion of matter in the way which becomes consistent with more traditional approaches to dissipative hydrodynamics in the small anisotropy limit. This formalism uses, however, the Romatschke-Strickland form \cite{Romatschke:2003ms} of the distribution function in the leading order, which implies that the two components of pressure in the transverse plane may be different only if the second-order corrections are taken into account.

In this work we present a new methodology for including three substantially different pressure components already in the leading order of hydrodynamic expansion. Our approach is based on the projection method introduced in Ref.~\cite{Florkowski:2011jg}, which has turned out to be a convenient tool to replace complicated tensor equations of relativistic hydrodynamics by a small set of scalar equations. We take into account the radial expansion of the produced matter (in addition to the longitudinal Bjorken flow) but our considerations are confined to the case with cylindrical symmetry. We generalize the Romatschke-Strickland form to the case where all three pressure components may be different. Compared to earlier works on anisotropic hydrodynamics in the leading order, where the zeroth and first moments of the Boltzmann equation have been studied, an important novel feature of our present work is the analysis of the second moment of the Boltzmann equation. We argue that a successful agreement with the Israel-Stewart theory in the limit of small anisotropies may be achieved if we take into account two equations constructed from the second moment of the Boltzmann equation rather than taking one equation from the zeroth moment and another equation from the second moment.

In our opinion, the use of the second moment sheds new light on the framework of anisotropic hydrodynamics. We expect, that the formalism developed in this paper may be a better starting point for the second-order anisotropic hydrodynamics developed according to the guidelines presented in Ref.~\cite{Bazow:2013ifa}. In addition, the presented approach may be generalized in the natural way to the 2+1 case where the cylindrical symmetry is relaxed.

The paper is organized as follows: In the next Section we introduce the four-vectors $U$, $X$, $Y$, and $Z$ used to decompose different tensors used in our formalism, in particular, to decompose the expansion and shear tensors. In Sec.~\ref{sect:BE} we discuss the Boltzmann equation in the relaxation time approximation and introduce the anisotropic distribution function characterized by three anisotropy parameters. The zeroth moment of the Boltzmann equation is discussed shortly in Sec.~\ref{sect:0mom}. In Sec.~\ref{sect:1mom} we characterize the energy-momentum conservation law, the Landau matching condition, and the close-to-equilibrium limit of the energy-momentum tensor. The formulas for the energy-density and pressure of anisotropic systems are presented in Sec.~\ref{sect:enedenaniso}. Sec.~\ref{sect:2mom} contains the analysis of the second moment of the Boltzmann equation. The most important part of the paper, Sec.~\ref{sect:set}, describes the construction of two equations (out of the complete set of second moment equations) which are finally accepted as the two new equations of anisotropic hydrodynamics in the leading order. The entropy production and its positivity is discussed in Sec.~\ref{sect:ent}. We summarize and conclude in Sec.~\ref{sect:con}. Two appendices containing explicit forms of different expressions and integrals close the paper. Throughout the paper we use natural units where $c=\hbar=k_B=1$ and the metric tensor with the signature $(+,-,-,-)$.

\section{Projection method for boost-invariant and cylindrically symmetric hydrodynamic systems}
\label{sect:projection}

\subsection{Boost-invariant and cylindrically symmetric flow}
\label{sect:flow}

The space-time coordinates and the four-vector describing the hydrodynamic flow are denoted in the standard way as
$x^\mu = \left( t, x, y, z \right)$ and
\begin{equation}
U^\mu = \gamma (1, v_x, v_y, v_z), \quad \gamma = (1-v^2)^{-1/2}.
\label{Umu0}
\end{equation}
For boost-invariant and cylindrically symmetric systems, the scalar quantities may depend only on the (longitudinal) proper time and the radial distance
\begin{equation}
\tau = \sqrt{t^2 - z^2}, \quad r = \sqrt{x^2 + y^2}.
\label{taur}
\end{equation}
In addition, for the boost-invariant hydrodynamic flow (\ref{Umu0}) we may use the following parametrization
\begin{eqnarray}
U^0 = \cosh \theta_\perp \cosh \eta_\parallel, \quad U^1 = \sinh \theta_\perp \cos \phi,
\quad U^2 = \sinh \theta_\perp \sin \phi, \quad U^3 = \cosh \theta_\perp \sinh \eta_\parallel,
\label{Umu}
\end{eqnarray}
where $\theta_\perp=\theta_\perp(\tau,r)$ is the transverse fluid rapidity defined by the formula
\begin{equation}
v_\perp = \sqrt{v_x^2+v_y^2} = \frac{\tanh \theta_\perp}{\cosh\eta_\parallel}.
\label{thetaperp}
\end{equation}
Here $\eta_\parallel$ is the space-time rapidity and $\phi$ is the azimuthal angle
\begin{eqnarray}
\eta_\parallel = \frac{1}{2} \ln \frac{t+z}{t-z},
\quad \phi = \arctan \frac{y}{x}.
\label{etaparphi}
\end{eqnarray}

In addition to $U^\mu$ we define three other four-vectors. The first one, $Z^\mu$, defines the longitudinal direction that plays a special role due to the initial geometry of the collision,
\begin{eqnarray}
Z^0 = \sinh \eta_\parallel, \quad Z^1 = 0, \quad Z^2 = 0, \quad Z^3 = \cosh \eta_\parallel.
\label{Zmu}
\end{eqnarray}
The second four-vector, $X^\mu$, defines a transverse direction to the beam,
\begin{eqnarray}
X^0 = \sinh \theta_\perp \cosh \eta_\parallel, \quad X^1 = \cosh \theta_\perp \cos \phi, \quad X^2 = \cosh \theta_\perp \sin \phi, \quad X^3 = \sinh \theta_\perp \sinh \eta_\parallel,
\label{Xmu}
\end{eqnarray}
while the third four-vector, $Y^\mu$, defines the second transverse direction,
\begin{eqnarray}
Y^0 = 0, \quad Y^1 = -\sin \phi, \quad Y^2 = \cos \phi, \quad Y^3 = 0.
\label{Ymu}
\end{eqnarray}

The four-vector $U^\mu$ is time-like, while the four-vectors $Z^\mu, X^\mu, Y^\mu$ are space-like. In addition, they are all orthogonal to each other,
\begin{eqnarray}
U^2 &=& 1, \quad Z^2 = X^2 = Y^2 = -1, \nonumber \\
U \cdot Z &=& 0, \quad U \cdot X = 0, \quad U \cdot Y = 0, \nonumber \\
Z \cdot X &=& 0, \quad Z \cdot Y = 0, \quad X \cdot Y = 0.
\label{norm}
\end{eqnarray}
All these properties are most easily seen in the {\it local rest frame} of the fluid element (LRF), where we have \mbox{$\theta_\perp = \eta_\parallel = \phi = 0$} and
\begin{eqnarray}
U = (1,0,0,0), \quad Z = (0,0,0,1), \quad X = (0,1,0,0), \quad Y = (0,0,1,0).
\label{LRF}
\end{eqnarray}

In the standard formalism of dissipative hydrodynamics one uses the operator $ \Delta^{\mu \nu} = g^{\mu \nu} - U^\mu U^\nu$, that projects on the three-dimensional space orthogonal to $U^\mu$. It can be shown that
\begin{equation}
\Delta^{\mu \nu} = g^{\mu \nu} - U^\mu U^\nu = -X^\mu X^\nu - Y^\mu Y^\nu - Z^\mu Z^\nu.
\label{Delta}
\end{equation}
Using Eqs. (\ref{norm}) we find that $Z^\mu, X^\mu$ and $Y^\mu$ are the eigenvectors of $\Delta^{\mu \nu}$,
\begin{equation}
\Delta^{\mu}_{\,\, \nu} \,X^\nu = X^\mu, \quad \Delta^{\mu}_{\,\, \nu} \,Y^\nu = Y^\mu, \quad
\Delta^{\mu}_{\,\, \nu} \,Z^\nu = Z^\mu.
\label{eigen}
\end{equation}
In this work, following the method of Ref.~\cite{Florkowski:2011jg}, we use the tensor products of the four-vectors $U, X, Y$, and $Z$ as the basis to decompose all other tensors appearing in the formalism of standard dissipative hydrodynamics and anisotropic hydrodynamics. This allows us to replace complicated tensor equations by a set of scalar equations and to identify the key degrees of freedom in anisotropic hydrodynamics. Various formulas and identities satisfied by the four-vectors $U, X, Y$, and $Z$, and also by their derivatives are listed in Sec.~\ref{sect:explicitr}. We shall refer frequently to those expressions in this paper.

\subsection{Expansion and shear tensors}
\label{sect:expandshear}

For the sake of convenience, we present now explicit forms of the expansion and shear tensors expressing them in terms of $X$, $Y$ and $Z$. In the general case, the expansion tensor is defined by the formula~\cite{Muronga:2003ta}
\begin{equation}
\theta_{\mu \nu} = \Delta^\alpha_\mu \Delta^\beta_\nu \partial_{(\beta} U_{\alpha)},
\label{theta-munu}
\end{equation}
where the brackets denote the symmetric part of $\partial_{\beta} U_{\alpha}$. Using Eqs. (\ref{Umu}) in the definition of the expansion tensor (\ref{theta-munu}) and also using Eqs. (\ref{Zmu})--(\ref{Ymu}), we find that the following decomposition holds for boost-invariant and cylindrically symmetric systems \cite{Florkowski:2011jg}~\footnote{We stress that the subscripts $X$, $Y$, and $Z$ do not denote the Cartesian coordinates but refer typically to the coefficients in the s such as Eq.~(\ref{theta-dec}).}
\begin{equation}
\theta^{\mu \nu} = \theta_X X^\mu X^\nu + \theta_Y Y^\mu Y^\nu + \theta_Z Z^\mu Z^\nu,
\label{theta-dec}
\end{equation}
where
\begin{equation}
\theta_X = - \frac{\partial \theta_\perp}{\partial r} \cosh \theta_\perp
- \frac{\partial \theta_\perp}{\partial \tau} \sinh \theta_\perp, \quad
\theta_Y = - \frac{\sinh \theta_\perp}{r}, \quad
\theta_Z = - \frac{\cosh \theta_\perp}{\tau}.
\label{thetas}
\end{equation}
The contraction of the tensors $\Delta^{\mu \nu}$ and $\theta^{\mu \nu}$ gives the volume expansion parameter $\theta = \Delta^{\mu \nu} \theta_{\mu \nu}$. Equations~(\ref{Delta})--(\ref{theta-munu}) yield
\begin{eqnarray}
\theta = -\theta_X - \theta_Y - \theta_Z.
\label{volexpp}
\end{eqnarray}
It is interesting to check that the volume expansion parameter $\theta$ may be expressed also by the formula $\theta = \partial_\mu U^\mu$.

In addition to the expansion tensor $\theta^{\mu\nu}$ we shall use the shear tensor $\sigma_{\mu \nu}$. The latter is defined by the formula
\begin{equation}
\sigma_{\mu \nu} = \theta_{\mu \nu} - \frac{1}{3} \Delta_{\mu \nu} \theta.
\label{sigma1}
\end{equation}
With the help of the decompositions (\ref{Delta}) and (\ref{theta-dec}) we may write
\begin{equation}
\sigma^{\mu \nu} = \sigma_X X^\mu X^\nu + \sigma_Y Y^\mu Y^\nu + \sigma_Z Z^\mu Z^\nu,
\label{sigma-dec}
\end{equation}
where
\begin{eqnarray}
\sigma_X &=& \frac{\theta}{3}+\theta_X = \frac{\cosh \theta_\perp}{3 \tau} +
\frac{\sinh\theta_\perp}{3r}
-\frac{2}{3} \frac{\partial\theta_\perp}{\partial \tau} \sinh\theta_\perp
-\frac{2}{3} \frac{\partial\theta_\perp}{\partial r} \cosh\theta_\perp , \label{sigmaX}
\end{eqnarray}
\begin{eqnarray}
\sigma_Y &=& \frac{\theta}{3}+\theta_Y = \frac{\cosh \theta_\perp}{3 \tau} -
\frac{2 \sinh\theta_\perp}{3r}
+\frac{1}{3} \frac{\partial\theta_\perp}{\partial \tau} \sinh\theta_\perp
+\frac{1}{3} \frac{\partial\theta_\perp}{\partial r} \cosh\theta_\perp , \label{sigmaY}
\end{eqnarray}
and
\begin{eqnarray}
\sigma_Z &=& \frac{\theta}{3}+\theta_Z
= -\frac{2\cosh \theta_\perp}{3 \tau} +
\frac{\sinh\theta_\perp}{3r}
+\frac{1}{3} \frac{\partial\theta_\perp}{\partial \tau} \sinh\theta_\perp
+\frac{1}{3} \frac{\partial\theta_\perp}{\partial r} \cosh\theta_\perp . \label{sigmaZ}
\end{eqnarray}
In agreement with general requirements we find that
\begin{eqnarray}
\sigma_X+\sigma_Y+\sigma_Z=0.
\label{sumsigma}
\end{eqnarray}
In the case where the radial flow is absent \mbox{$\sigma_X = \sigma_Y = 1/(3 \tau)$} and \mbox{$\sigma_Z = -2/(3 \tau)$}, which agrees with earlier findings \cite{Muronga:2003ta}. For brevity of notation, expressions such as Eqs.~(\ref{theta-dec}) or (\ref{sigma-dec}) will be written shortly as the sums
\begin{eqnarray}
\theta^{\mu\nu} = \sum_I \theta_I I^\mu I^\nu,
\qquad
\sigma^{\mu\nu} = \sum_I \sigma_I I^\mu I^\nu,\end{eqnarray}
where $I$ takes the values $X$, $Y$, and $Z$. Similarly, Eqs.~(\ref{volexpp}) and (\ref{sumsigma}) may be written as
\begin{eqnarray}
\theta = -\sum_I \theta_I, \qquad
\sum_I \sigma_I = 0.
\end{eqnarray}

\section{Boltzmann equation and anisotropic distribution functions}
\label{sect:BE}

The basis for our considerations is the Boltzmann equation treated in the relaxation time approximation
\cite{Bhatnagar:1954zz,Baym:1984np,Baym:1985tna,
Heiselberg:1995sh,Wong:1996va}
\begin{equation}
p\cdot\partial f = \frac{p\cdot U}{\tau_{\rm eq}}
\left(f_{\rm eq} - f \right).
\label{RTA}
\end{equation}
In Eq.~(\ref{RTA}) $f$ is the phase-space distribution function, $f_{\rm eq}$ is the equilibrium distribution function, and $\tau_{\rm eq}$ is the relaxation time. In different frameworks of anisotropic hydrodynamics which have been studied so far, one assumes that the distribution function $f$ is very well approximated by the Romatschke-Strickland form \cite{Romatschke:2003ms}. The use of this form is, however, not satisfactory in the cases where the transverse expansion is included in addition to the longitudinal Bjorken flow. If the effects connected with shear viscosity are taken into account, the presence of the transverse flow induces differences between the two components of pressure in the transverse plane. The Romatschke-Strickland form allows for the difference between the longitudinal and transverse pressures but the two transverse pressures must be identical.

An essential new feature of the present work is the generalization of the Romatschke-Strickland form to the expression which allows for three different components of pressure
\begin{eqnarray}
f(x,p) = k \exp\left(-\frac{1}{\Lambda}\sqrt{ \left( 1 + \zeta_X \right) \left( p\cdot X \right)^2 + \left( 1 + \zeta_Y \right) \left( p\cdot Y \right)^2 + \left( 1 + \zeta_Z \right) \left( p\cdot Z \right)^2 }\right).
\label{fzeta}
\end{eqnarray}
Here $k$ is an overall normalization constant, $\Lambda$ is the typical momentum scale, and $\zeta_I$'s $(I=X,Y,Z)$ are three anisotropy parameters. In the special case where $\zeta_X=\zeta_Y=0$, Eq.~(\ref{fzeta}) is reduced to the Romatschke-Strickland form \cite{Romatschke:2003ms}. In more general cases, the function (\ref{fzeta}) depends on the three different ratios $(1+\zeta_I)/\Lambda^2$. Exactly this feature allows us to introduce three different components of pressure in the local rest frame.

Introducing the new variables, namely,
\begin{eqnarray}
\lambda = \frac{ \Lambda }{ \sqrt{ 1 +
\frac{ 1 }{ 3 } (\zeta_X + \zeta_Y + \zeta_Z)} }, \quad \xi_I = \frac{1 +\zeta_I}{ 1 + \frac{1}{ 3 } ( \zeta_X + \zeta_Y + \zeta_Z )} - 1
\quad (I=X,Y,Z),
\end{eqnarray}
the distribution function (\ref{fzeta}) may be rewritten in the equivalent form as
\begin{eqnarray}
f(x,p) &=& k \exp\left(-\frac{1}{\lambda}\sqrt{ \left( 1 + \xi_X \right) \left( p\cdot X \right)^2 + \left( 1 + \xi_Y \right) \left( p\cdot Y \right)^2 + \left( 1 + \xi_Z \right) \left( p\cdot Z \right)^2 }\,\,\right) \nonumber \\
&=& k \exp\left(-\frac{1}{\lambda}\sqrt{
\left( p\cdot U\right)^2 +
\xi_X \left( p\cdot X \right)^2 +
\xi_Y \left( p\cdot Y \right)^2 +
\xi_Z \left( p\cdot Z \right)^2 }\,\,\right),
\label{fxi}
\end{eqnarray}
where the new anisotropy parameters $\xi_I$ satisfy the condition
\begin{eqnarray}
\sum_I \xi_I = \xi_X + \xi_Y + \xi_Z = 0.
\label{sumofxis}
\end{eqnarray}
To replace the first line in Eq.~(\ref{fxi}) by the second line we used Eq.~(\ref{Delta}) and the mass-shell condition $p^2=m^2=0$. The physical constraints $\Lambda > 0$ and $1+\zeta_I > 0$ imply that $\lambda > 0$ and $1+\xi_I > 0$. Hence, the initial parametrization (\ref{fzeta}) is completely equivalent to the new one. Below we shall use the expression (\ref{fxi}) and treat the scale $\lambda$ together with the two anisotropy parameters $\xi_X$ and $\xi_Y$ as three independent variables~\footnote{We note that the covariant form of the distribution function depends also on the transverse fluid rapidity $\theta_\perp$ through the vectors $U^\mu$ and $X^\mu$. Hence, we have in fact four independent scalar functions in (\ref{fxi}).}. Equation (\ref{sumofxis}) defines the applicability range of our parameterization
\begin{eqnarray}
-1 < \xi_X, \quad -1 < \xi_Y, \quad \xi_X+\xi_Y < 1.
\label{range}
\end{eqnarray}
The equilibrium function in (\ref{RTA}) has the form
\begin{eqnarray}
f_{\rm eq}(x,p) = k \exp\left( - \frac{p \cdot U}{T}
\right).
\label{feq}
\end{eqnarray}
One can show that the distribution function (\ref{fxi}) is reduced to the form (\ref{feq}) with $\lambda=T$, if the anisotropy parameters $\xi_I$ are all set equal to zero.

\section{Zeroth moment and particle number density}
\label{sect:0mom}

In this Section we present the zeroth moment of the Boltzmann equation. The zeroth and the first moments of the Boltzmann equation were used in Refs.~\cite{Martinez:2010sc,Martinez:2010sd,
Martinez:2012tu} to derive equations of anisotropic hydrodynamics in the direct relation to kinetic theory. This approach is suitable for the analysis of one-dimensional boost-invariant flow, since the first two moments yield three equations for three unknown functions (in this work these functions have been introduced as $\Lambda$, $T$, and $\zeta_Z$). If the transverse flow is included, one has to take into consideration one or more equations from the second moment of the Boltzmann equation. Below, we shall argue that in the boost-invariant and cylindrically symmetric case (with non-zero radial flow) it is preferable to consider two equations from the second moment rather than one equation from the zeroth moment together with an extra equation obtained from the second moment. Consequently, the formulas introduced in this Section will serve only as the reference point.

Having in mind the comments stated above, we introduce the zeroth moment of the kinetic equation (\ref{RTA})
\begin{eqnarray}
\int\!\! dP \; p\cdot\partial f = \frac{1}{\tau_{\rm eq}} \int\!\! dP \, p\cdot U
\left( f_{\rm eq} -f \right).
\label{zm1}
\end{eqnarray}
Here $dP=d^3{\bf p}/p$ is the Lorentz invariant integration measure (for massless particles considered in this work \mbox{$p = \sqrt{p_x^2+p_y^2+p_z^2}$}). Using the standard definition of the particle number current we find
\begin{eqnarray}
N^\mu = \int dP\, p^\mu f = n \,U^\mu, \quad N^\mu_{\rm eq} = \int dP\, p^\mu f_{\rm eq}
= n_{\rm eq}\, U^\mu,
\end{eqnarray}
and
\begin{eqnarray}
D n + n \theta = \frac{1}{\tau_{\rm eq}} \left(
n_{\rm eq} - n \right),
\label{zm2}
\end{eqnarray}
where $\theta$ is the expansion parameter defined in (\ref{volexpp}). We note that there are no terms proportional to the four-vectors $X^\mu$, $Y^\mu$ or $Z^\mu$ in the expansion of the current $N^\mu$. This is due to the quadratic dependence of the distribution function (\ref{fxi}) on these four-vectors. Dividing (\ref{zm2}) by $n$ we may further rewrite the zeroth moment equation as
\begin{eqnarray}
D \ln n +
\theta = \frac{1}{\tau_{\rm eq}}
\left( \frac{n_{\rm eq}}{n}-1\right).
\label{zm3}
\end{eqnarray}

The particle number density $n$ calculated for the anisotropic distribution function (\ref{fxi}) equals
\begin{eqnarray}
n(\lambda,\xi) =
\frac{8\pi k \lambda^3}{\sqrt{1+\xi_X} \sqrt{1+\xi_Y}\sqrt{1+\xi_Z}}.
\label{n}
\end{eqnarray}
On the left-hand side of (\ref{n}) we use the short-hand notation, $\xi$, to denote three anisotropy parameters $\xi_X$, $\xi_Y$, and $\xi_Z=-\xi_X-\xi_Y$. In equilibrium, the expression for the particle number density simplifies to
\begin{eqnarray}
n_{\rm eq}(T) = 8\pi k T^3.
\label{neq}
\end{eqnarray}

\section{First moment of kinetic equation}
\label{sect:1mom}

\subsection{Energy-momentum conservation}
\label{sect:enmomcon}

The first moment of the kinetic equation (\ref{RTA}) reads
\begin{eqnarray}
\int\!\! dP \; p^\nu p\cdot\partial f = \frac{1}{\tau_{\rm eq}} \int\!\! dP \,p^\nu \, p\cdot U
\left( f_{\rm eq} -f \right).
\label{fm1}
\end{eqnarray}
With the energy-momentum tensors defined by the second moments of the distribution functions,
\begin{eqnarray}
T^{\mu\nu} = \int dP p^\mu p^\nu f, \quad T^{\mu\nu}_{\rm eq} = \int dP p^\mu p^\nu f_{\rm eq},
\label{Tmunus}
\end{eqnarray}
we may rewrite Eq.~(\ref{fm1}) as
\begin{eqnarray}
\partial_\mu T^{\mu\nu} = \frac{1}{\tau_{\rm eq}} \left( U_\mu T^{\mu\nu}_{\rm eq} - U_\mu T^{\mu\nu} \right).
\label{fm2}
\end{eqnarray}
Since we want to conserve energy and momentum in the system, the left-hand side of Eq.~(\ref{fm2}) must vanish
\begin{eqnarray}
\partial_\mu T^{\mu\nu} = 0.
\label{enmomcon}
\end{eqnarray}
This leads us to the conclusion that the first-moment equations (\ref{fm1}) and (\ref{fm2}) are satisfied only if the Landau matching condition is satisfied
\begin{eqnarray}
U_\mu T^{\mu\nu}_{\rm eq} = U_\mu T^{\mu\nu}.
\label{LM1}
\end{eqnarray}

The form of the distribution function (\ref{fxi}) implies that the energy-momentum tensor of the anisotropic system has the structure
\begin{eqnarray}
T^{\mu \nu} = \varepsilon\, U^\mu U^\nu + P_X X^\mu X^\nu + P_Y Y^\mu Y^\nu + P_Z Z^\mu Z^\nu
= \varepsilon\, U^\mu U^\nu + \sum_I P_I I^\mu I^\nu,
\label{Tmunu}
\end{eqnarray}
where $\varepsilon$ is the energy density, while $P_X, P_Y$ and $P_Z$ are three different pressure components. In the local rest frame the energy-momentum tensor has the diagonal structure,
\begin{equation}
T^{\mu \nu} = \left(
\begin{array}{cccc}
\varepsilon & 0 & 0 & 0 \\
0 & P_X & 0 & 0 \\
0 & 0 & P_Y & 0 \\
0 & 0 & 0 & P_Z
\end{array} \right).
\label{Tmunuarray}
\end{equation}
In local equilibrium, $\varepsilon = \varepsilon_{\rm eq}$ and the three pressures become equal, $P_X = P_Y = P_Z = P_{\rm eq} = \varepsilon/3 $. Hence, the equilibrium energy-momentum tensor has the expected form
\begin{eqnarray}
T^{\mu \nu}_{\rm eq} &=&
\varepsilon_{\rm eq} U^\mu U^\nu + P_{\rm eq} X^\mu X^\nu + P_{\rm eq} Y^\mu Y^\nu + P_{\rm eq} Z^\mu Z^\nu, \nonumber \\
&=& \varepsilon_{\rm eq} U^\mu U^\nu - P_{\rm eq}
\Delta^{\mu\nu} = \left(\varepsilon_{\rm eq} +
P_{\rm eq} \right) U^\mu U^\nu - P_{\rm eq} g^{\mu\nu}.
\label{Tmunueq}
\end{eqnarray}
The use of the expressions (\ref{Tmunu}) and (\ref{Tmunueq}) in the Landau matching condition (\ref{LM1}) leads directly to the two equations
\begin{eqnarray}\label{p_e_d}
\varepsilon U^\mu = \varepsilon_{\rm eq} U^\mu, \quad \quad \varepsilon = \varepsilon_{\rm eq}.
\end{eqnarray}
We thus see that the Landau matching condition implies simply that the energy density of the system should be equal to the energy density of the thermal background. This requirement allows us to determine the effective temperature $T$ appearing in the thermal distribution $f_{\rm eq}$.

For boost-invariant and cylindrically symmetric systems only two out of four equations appearing in the conservation laws (\ref{enmomcon}) are independent. They are the same as those derived in Ref. \cite{Florkowski:2011jg} and may be written in the compact form as
\begin{eqnarray}
D \varepsilon + \varepsilon \, \theta - \sum_I P_I \theta_I = 0
\label{enmom1}
\end{eqnarray}
and
\begin{eqnarray}
&& \left( X\cdot\partial \right) P_X + P_X \left( \partial\cdot X \right) - \varepsilon \left( X\cdot DU \right) - P_Y \left[ X\cdot\left( Y\cdot\partial \right)Y \right] - P_Z \left[ X\cdot\left( Z\cdot\partial \right)Z \right] = 0. \label{enmom2}
\end{eqnarray}
See Sec.~\ref{sect:explicitr} for the explicit formulas of the derivatives appearing in (\ref{enmom1}) and (\ref{enmom2}).

The standard dissipative hydrodynamics is based on the gradient expansion around the isotropic background. In this case, one usually considers small deviations from the equilibrium values. From this point of view it is interesting and useful to consider the close-to-equilibrium limit of our framework. Therefore, we introduce deviations from the equilibrium pressure, $\pi_I$'s, defined by the relations
\begin{equation}\label{pi_I}
P_X = P_{\rm eq} + \pi_X,
\qquad P_Y = P_{\rm eq} + \pi_Y,
\qquad P_Z = P_{\rm eq} + \pi_Z.
\end{equation}
The sum of the pressure deviations is equal to zero
\begin{eqnarray}
\sum_I \pi_I = \pi_X + \pi_Y + \pi_Z = 0.
\label{sumpiI}
\end{eqnarray}
The equilibrium pressure $P_{\rm eq}$ is one third of the energy density, $P_{\rm eq}=\varepsilon/3$. Changing from $P_I$'s to $\pi_I$'s we rewrite Eq.~(\ref{enmom1}) in the equivalent forms
\begin{equation}\label{alenmom2}
D\varepsilon + \frac{4}{3}\theta - \sum_I \pi_I\theta_I = 0, \qquad D\ln\varepsilon = -\frac{4}{3}\theta + \sum_I \frac{\pi_I}{\varepsilon}\,\theta_I.
\end{equation}

\subsection{Close-to-equilibrium behavior}
\label{sect:closetoeq}

In order to find the pressure deviations $\pi_I$'s in the close-to-equilibrium limit, we expand the anisotropic distribution function around the thermal background,
\begin{equation}
f \simeq f_{\rm eq} \left( 1 + \frac{\lambda - T}{ T^2 } (p\cdot U) - \frac{ \xi_X (p\cdot X)^2 + \xi_Y (p\cdot Y)^2 + \xi_Z (p\cdot Z)^2 }{2T (p\cdot U)} \right).
\end{equation}
Here, we neglect higher order contributions in $\xi_I$'s and in the difference $\lambda-T$. Then, the energy-momentum tensor reads
\begin{eqnarray}
T^{\mu\nu} &\simeq& T^{\mu\nu}_{\rm eq} + 96\, \pi\,k\,T^3\,U^\mu U^\nu \left( \frac{}{} \lambda - T \right)- 32\,\pi\,k\,T^3\,\Delta^{\mu\nu}\left( \frac{}{} \lambda - T \right)
\nonumber \\
&& -\frac{32\, \pi}{5}k\, T^4 \left( \frac{}{} \xi_X X^\mu X^\nu + \xi_Y Y^\mu Y^\nu + \xi_Z Z^\mu Z^\nu \right).
\end{eqnarray}
Using the Landau matching~(\ref{p_e_d}) we find that $\lambda=T$ in the leading order. Hence, the energy-momentum tensor in the leading order reads
\begin{equation}
T^{\mu\nu} \simeq T^{\mu\nu}_{\rm eq} -\frac{32 \pi}{5}k\, T^4 \left( \frac{}{} \xi_X X^\mu X^\nu + \xi_Y Y^\mu Y^\nu + \xi_Z Z^\mu Z^\nu \right).
\end{equation}
This expression helps us to identify directly the pressure corrections:
\begin{equation}\label{shear-eq1}
\pi_X \simeq - \frac{ 32 \pi k T^4}{5}\xi_X,
\qquad \pi_Y \simeq - \frac{32 \pi k T^4}{5}\xi_Y,
\qquad
\pi_Z \simeq - \frac{32 \pi k T^4}{5}\xi_Z.
\end{equation}
It is interesting to observe that the pressure corrections are directly proportional to the anisotropy parameters.

\section{Energy density and anisotropic pressure}
\label{sect:enedenaniso}

The energy density for the anisotropic distribution (\ref{fxi}) may be obtained from the contraction of the energy-momentum tensor with the four-vectors $U$, namely, $\varepsilon = U_\mu U_\nu T^{\mu\nu}$. This gives
\begin{eqnarray}
\varepsilon(\lambda,\xi) = 24 \pi k \lambda^4 {\cal R}(\xi),
\label{eps1}
\end{eqnarray}
where the function ${\cal R}(\xi)$ is defined by the integral (for details see Sec.~\ref{sect:R})
\begin{equation}\label{R}
{\cal R}(\xi) = \frac{1}{4\pi \sqrt{ \prod_J(1 + \xi_J) } }\int_0^{2\pi}\!\!\!\! {\rm d}\phi \int_0^\pi \!\!\! {\rm d}\theta \sin\theta \sqrt{ \frac{ \cos^2\phi\sin^2\theta }{1+\xi_X} + \frac{ \sin^2\phi\sin^2\theta }{1+\xi_Y} + \frac{ \cos^2\theta }{1+\xi_Z} }.
\end{equation}
In the case of local equilibrium we have
\begin{eqnarray}
\varepsilon_{\rm eq}(T) = 24 \pi k T^4.
\label{epseq1}
\end{eqnarray}
This leads us to the equivalent formulation of the Landau matching condition (\ref{LM1}) in the form
\begin{eqnarray}
T^4 = \lambda^4 {\cal R}(\xi),
\label{LM2}
\end{eqnarray}
which resembles the condition derived first in \cite{Martinez:2010sc}. It is important to notice, however, that our definitions of the parameters $\lambda$ and $\xi$ are different from the definitions of the parameters $\Lambda$ and $\xi$ used in \cite{Martinez:2010sc}~\footnote{The use of a single parameter $\xi$ in \cite{Martinez:2010sc} corresponds to the use of the non-zero parameter $\zeta_Z$ in our approach with the constraint $\zeta_X=\zeta_Y=0$. Our triplet of $\xi_I$'s satisfies the condition (\ref{sumofxis}). By the way, one can use the condition (\ref{sumofxis}) to check that the expansion of the function ${\cal R}(\xi)-1$ around zero has only quadratic terms in $\xi_I$'s. This is in agreement with the leading order result $\lambda=T$.}.

Similarly to the energy density, the three components of anisotropic pressure may be obtained from three contractions of the energy-momentum tensor with the four-vectors $X$, $Y$, and $Z$, namely, $P_X = X_\mu X_\nu T^{\mu\nu}$, $P_Y = Y_\mu Y_\nu T^{\mu\nu}$, and $P_Z = Z_\mu Z_\nu T^{\mu\nu}$. This leads to the formula
\begin{eqnarray}
P_I(\lambda,\xi) = 24 \pi k \lambda^4 {\cal H}_I (\xi),
\label{P_I}
\end{eqnarray}
where the functions ${\cal H}_I$ may be obtained by differentiation of the function ${\cal R}$ (the definitions of the functions ${\cal H}_I$ as integrals, which leads directly to (\ref{H_I}), are given also in Sec.~\ref{sect:R})
\begin{eqnarray}\nonumber
{\cal H}_I &=& -\frac{ 2\left( 1+ \xi_I \right) }{\sqrt{\prod_J (1 + \xi_J)}} \partial_{\xi_I}\left[ \sqrt{\prod_J (1 + \xi_J)}\;{\cal R} \right] \\
&=& -2\left( 1+ \xi_I \right){\cal R} \, \partial_{\xi_I}\left\{ \ln \left[ \sqrt{\prod_J (1 + \xi_J)} \; {\cal R} \right] \right\}
\label{H_I} \\ \nonumber
&=& -{\cal R} -2{\cal R}\left( 1 + \xi_I \right) \partial_{\xi_I} \left[ \ln\left( \frac{}{} {\cal R} \right) \right]	.
\end{eqnarray}
Since we consider a system of massless particles, $\varepsilon = P_X +P_Y + P_Z$ and the functions ${\cal H}_I$ satisfy the constraint
\begin{equation}
\sum_I {\cal H}_I = {\cal R}.
\label{sumH_I}
\end{equation}

\section{Second moment of kinetic equation}
\label{sect:2mom}

The second moment of the Boltzmann equation may be written in the form analogous to Eq.~(\ref{fm2}),
\begin{eqnarray}
\partial_\lambda \Theta^{\lambda\mu\nu} = \frac{1}{\tau_{\rm eq}} \left(U_\lambda\Theta_{\rm eq}^{\lambda\mu\nu} - U_\lambda\Theta^{\lambda\mu\nu}\right),
\label{tmom}
\end{eqnarray}
where
\begin{eqnarray}
\Theta^{\lambda\mu\nu} = \int\!\! dP \; p^\lambda p^\mu p^\nu f, \quad
\Theta^{\lambda\mu\nu}_{\rm eq} = \int\!\! dP \; p^\lambda p^\mu p^\nu f_{\rm eq}.
\label{Thetas}
\end{eqnarray}
The only non-vanishing terms in (\ref{Thetas}) are those with an even number of each spatial index. In the covariant form they read
\begin{eqnarray}
\Theta &=& \Theta_U \left[ U\otimes U \otimes U\right]
\nonumber \\
&& \,+\, \Theta_X \left[ U\otimes X \otimes X +X\otimes U \otimes X + X\otimes X \otimes U\right]
\nonumber \\
&& \,+\, \Theta_Y \left[ U\otimes Y \otimes Y +Y\otimes U \otimes Y + Y\otimes Y \otimes U\right]
\nonumber \\
&& \,+\, \Theta_Z \left[ U\otimes Z \otimes Z +Z\otimes U \otimes Z + Z\otimes Z \otimes U\right].
\label{Theta}
\end{eqnarray}
Due to the mass-shell condition $p^2 = m^2 = 0$, the coefficients in the expansion (\ref{Theta}) are not independent. One may check that
\begin{eqnarray}
\Theta_X + \Theta_Y + \Theta_Z = \Theta_U.
\label{ThetaU}
\end{eqnarray}
This and other tensor identities may be most easily checked in the local rest frame. A similar argument holds for the projections of $\Theta_{\rm eq}^{\lambda \mu\nu}$. In addition, due to the rotation invariance of the equilibrium distribution we have
\begin{eqnarray}
\Theta^{\rm eq}_X = \Theta^{\rm eq}_Y = \Theta^{\rm eq}_Z = \Theta_{\rm eq}\,.
\label{ThetaU}
\end{eqnarray}
Out of the ten independent equations in (\ref{tmom}) five are trivial $0=0$ equations. They correspond to the contractions of (\ref{tmom}) with $U\otimes Y$, $U\otimes Z$, $X\otimes Y$, $X\otimes Z$, and $Y\otimes Z$. The contraction with $U\otimes U$ may be represented as a linear combination of the contractions with $X\otimes X$,
$Y\otimes Y$, and $Z\otimes Z$. As a consequence, we deal with four independent contractions, namely, with $U\otimes X$, $X\otimes X$, $Y\otimes Y$, and $Z\otimes Z$. The contraction of (\ref{tmom}) with $U\otimes X$ gives
\begin{eqnarray}
D \left(\Theta_U + 2 \Theta_X \right) +
\left( X\cdot\partial \right) \Theta_X =
\frac{\sinh\theta_\perp}{\tau} \left( \Theta_Z - \Theta_X \right) +
\frac{\cosh\theta_\perp}{r} \left( \Theta_Y - \Theta_X \right),
\label{tmomUX}
\end{eqnarray}
with $X\otimes X$
\begin{eqnarray}
D \Theta_X + \Theta_X \left( \theta - 2\theta_X \right) = \frac{1}{\tau_{\rm eq}} \left(\Theta_{\rm eq} - \Theta_X \right),
\label{tmomXX}
\end{eqnarray}
with $Y\otimes Y$
\begin{eqnarray}
D \Theta_Y + \Theta_Y \left( \theta - 2\theta_Y \right) = \frac{1}{\tau_{\rm eq}} \left(\Theta_{\rm eq} - \Theta_Y \right),
\label{tmomYY}
\end{eqnarray}
and, finally, with $Z\otimes Z$
\begin{eqnarray}
D \Theta_Z + \Theta_Z \left( \theta - 2\theta_Z \right) = \frac{1}{\tau_{\rm eq}} \left(\Theta_{\rm eq} - \Theta_Z \right).
\label{tmomZZ}
\end{eqnarray}

\section{Selection of equations of motion - matching with Israel-Stewart theory}
\label{sect:set}

In the considered model we have five independent parameters (more precisely, five scalar functions of the proper time, $\tau$, and the transverse distance, $r$). These are: the momentum scale, $\lambda$, the effective temperature, $T$, the transverse rapidity, $\theta_\perp$, and two independent anisotropy parameters, for example, $\xi_X$ and $\xi_Y$. The two nontrivial equations from the first moment of the Boltzmann equation, Eqs.~(\ref{enmom1}) and (\ref{enmom2}), as well as the Landau matching condition~(\ref{LM2}) ensure local energy and momentum conservation. Therefore, these three equations should be definitely included in the computational scheme of anisotropic hydrodynamics.

The problem arises which equations should be taken into account in addition to the first-moment equations. We need two extra equations and they should be selected out of Eqs.~(\ref{zm3}), (\ref{tmomUX}), (\ref{tmomXX}), (\ref{tmomYY}), and (\ref{tmomZZ}). An important requirement for our approach is that it must agree with the Israel-Stewart approach in the close-to-equilibrium limit. In this case, the pressure corrections satisfy the three symmetric equations~\footnote{Due to the conditions $\sum_I \pi_I =0$ and $\sum_I \sigma_I =0$ only two out of three equations in (\ref{2_ord_visc}) are independent.}
\begin{equation}
\tau_\pi D\pi_I + \pi_I = 2 \eta \sigma_I + F_\eta \,\pi_I,
\label{2_ord_visc}
\end{equation}
where \cite{Muronga:2003ta}
\begin{eqnarray}
F_\eta &=& - \eta T \partial \cdot \left( \frac{\alpha_1}{2 T} U \right) .
\label{F}
\end{eqnarray}
In Eqs.~(\ref{2_ord_visc}) and (\ref{F}) the quantity $\tau_\pi$ is the relaxation time for the shear viscous corrections $\pi_I$, $\eta$ is the shear viscosity, and $\alpha_1$ is one of the kinetic coefficients appearing in second order hydrodynamics~\cite{Israel:1979wp}. The symmetric form of the three equations appearing in (\ref{2_ord_visc}) suggests that one should use Eqs.~(\ref{tmomXX}), (\ref{tmomYY}), and (\ref{tmomZZ}) as a starting point for possible generalizations of (\ref{2_ord_visc}) to the case of high pressure anisotropy. The use of the zeroth moment equation combined with one of the equations obtained from the second moment leads to asymmetric treatment of different anisotropies, which contradicts the symmetric form of Eqs.~(\ref{2_ord_visc}).

In the remaining part of this Section we show that two linear combinations of Eqs.~(\ref{tmomXX})--(\ref{tmomZZ}) provide indeed a system of equations which agree with Eqs.~(\ref{2_ord_visc}) in the close-to-equilibrium limit. At first, it is useful to take advantage of the fact that $\Theta_I$'s are positive,
\begin{equation}
\Theta_I = \int\!\! dP \, p \, (p\cdot I)^2 f = \frac{ 32 \, \pi \, k \, \lambda^5 }{ \sqrt{ \prod_J(1+\xi_J) } } \frac{1}{1+\xi_I},
\label{Theta_I}
\end{equation}
\begin{equation}
\Theta_{\rm eq} = \int\!\! dP \, p \, (p\cdot I)^2 f_{\rm eq} = { 32 \, \pi \, k \, T^5 } \qquad (I=X,Y,Z).
\label{Theta_eq}
\end{equation}
Then, we rewrite Eqs.~(\ref{tmomXX})--(\ref{tmomZZ}) dividing each of them first by $\Theta_I$. In this way we obtain
\begin{eqnarray}
D \ln\Theta_I + \theta - 2\theta_I = \frac{1}{\tau_{\rm eq}} \left[ \frac{\Theta_{\rm eq}}{\Theta_I} - 1 \right].
\label{(I)/I}
\end{eqnarray}
In the next step, we define the two desired equations by taking Eqs.~(\ref{(I)/I}) for $I=X$ and $I=Y$, and by subtracting one third of the sum of Eqs.~(\ref{(I)/I}) from these two equations
\begin{eqnarray}
D \ln\Theta_I + \theta - 2\theta_I -\frac{1}{3}\sum_J \left[\frac{}{} D \ln\Theta_J + \theta - 2\theta_J\right] = \frac{1}{\tau_{\rm eq}} \left[ \frac{\Theta_{\rm eq}}{\Theta_I} - 1 \right] -\frac{1}{3}\sum_J \left\{ \frac{1}{\tau_{\rm eq}} \left[ \frac{\Theta_{\rm eq}}{\Theta_J} - 1 \right] \right\} \quad (I=X,Y).
\label{sum}
\end{eqnarray}
Of course, other choices of the two indices $I$ are also possible. The very important feature of our strategy is that fulfilling Eqs.~(\ref{sum}) for arbitrary two indices implies that the same equation is fulfilled for the remaining third index. To demonstrate this property, we first make use of Eqs.~(\ref{Theta_I}) and (\ref{Theta_eq}) to rewrite Eqs.~(\ref{sum}) in the simpler form as
\begin{equation}
\frac{D\xi_I}{1+\xi_I}
-\frac{1}{3}\sum_J\frac{D\xi_J}{1+\xi_J}
+ 2\sigma_I + \frac{\xi_I}{\tau_{\rm eq}} \left( \frac{T}{\lambda} \right)^5\sqrt{ \prod_J(1+\xi_J) } = 0 \quad (I=X,Y).
\label{explicit-sum2}
\end{equation}
If Eq.~(\ref{explicit-sum2}) is fulfilled for $I=X$ and $I=Y$, the same equation holds for $I=Z$. Indeed, if we use the properties $\sigma_Z = -\sigma_X - \sigma_Y$ and $\xi_Z = -\xi_X - \xi_Y$, then the straightforward calculation shows
\begin{eqnarray}\nonumber
&& \frac{D\xi_Z}{1+\xi_Z}
-\frac{1}{3}\sum_J\frac{D\xi_J}{1+\xi_J}
+ 2\sigma_Z + \frac{\xi_Z}{\tau_{\rm eq}} \left( \frac{T}{\lambda} \right)^5\sqrt{ \prod_J(1+\xi_J) } \\\nonumber
&& = \frac{D\xi_Z}{1+\xi_Z}
-\frac{1}{3}\sum_J\frac{D\xi_J}{1+\xi_J}
+ \left[ -2 \sigma_X -\frac{\xi_X}{\tau_{\rm eq}} \left( \frac{T}{\lambda} \right)^5\sqrt{ \prod_J(1+\xi_J) } \right] + \left[ -2 \sigma_Y -\frac{\xi_Y}{\tau_{\rm eq}} \left( \frac{T}{\lambda} \right)^5\sqrt{ \prod_J(1+\xi_J) } \right] \\
&& = \frac{D\xi_Z}{1+\xi_Z} + \frac{D\xi_X}{1+\xi_Z} + \frac{D\xi_Y}{1+\xi_Y} - \sum_J\frac{D\xi_J}{1+\xi_J} \equiv 0.
\end{eqnarray}

Close to equilibrium, the anisotropy parameters $\xi_I$ are proportional to the pressure corrections $\pi_I$. Using Eqs.~(\ref{p_e_d}), (\ref{shear-eq1}), and (\ref{epseq1}), we find
\begin{equation}\label{shear-eq2}
\xi_X \simeq -\frac{15}{4} \frac{ \pi_X}{ \varepsilon } \qquad \xi_Y \simeq -\frac{15}{4} \frac{ \pi_Y}{ \varepsilon } \qquad \xi_Z \simeq -\frac{15}{4} \frac{ \pi_Z}{ \varepsilon }.
\end{equation}
Since $\xi_X + \xi_Y + \xi_Z =0$ and $\lambda=T$ up to quadratic terms in the anisotropy parameters, Eqs.~(\ref{explicit-sum2}) reads
\begin{equation}\label{IS-equivalent}
D\xi_I + 2\sigma_I + \frac{\xi_I}{\tau_{\rm eq}} \simeq 0.
\end{equation}
Using Eqs.~(\ref{shear-eq2}) and multiplying the last equation by $-4\varepsilon/15$ we obtain
\begin{equation}\label{IS-equivalent}
D\pi_I -\pi_I D\ln\varepsilon - 2 \left( \frac{4}{15}\, \varepsilon \right)\sigma_I + \frac{\pi_I}{\tau_{\rm eq}} \simeq 0,
\end{equation}
or
\begin{equation}\label{IS-equivalent2}
\tau_{\rm eq} D\pi_I + \pi_I = 2 \left( \frac{4}{15}\, \varepsilon \, \tau_{\rm eq}\right) \sigma_I +\tau_{\rm eq}\, \pi_I \, D\ln\varepsilon.
\end{equation}
The two terms on the left-hand side of (\ref{IS-equivalent2}) and the first term on the right-hand side of (\ref{IS-equivalent2}) agree with the corresponding terms in Eq.~(\ref{2_ord_visc}) provided we connect the relaxation time $\tau_\pi$ and the shear viscosity $\eta$ with the relaxation time $\tau_{\rm eq}$ and the energy density $\varepsilon$ by the expressions
\begin{equation}\label{transport_coefficients}
\tau_\pi = \tau_{\rm eq}, \qquad \eta = \frac{4}{15} \, \varepsilon \, \tau_{\rm eq}.
\end{equation}
Finally, one may find the value of the coefficient $\alpha_1$ comparing the last term with the expression for $ F_\eta \pi_I$
. Using (\ref{F}), one finds

\begin{eqnarray}
\pi_I F_\eta &=&
- \pi_I \eta T \, \partial \cdot \left( \frac{\alpha_1}{2 T} U \right) = \pi_I \left[ -\partial\cdot\left(\frac{}{}\eta\alpha_1\, U\right) + \eta\alpha_1D\ln\left(\frac{}{}\eta T\right) \right] = \\ \nonumber
&=& \pi_I \left[ -\left( \eta\alpha_1 \right)\theta - D\left( \eta\alpha_1 \right) +\left(\eta\alpha_1\right)D\ln\left(\frac{}{}\eta T\right) \right]
\end{eqnarray}
With the relaxation time $\tau_\pi$ and the viscosity $\eta$ identified through Eq.~(\ref{transport_coefficients}) and also using Eqs.~(\ref{p_e_d}), (\ref{alenmom2}), and (\ref{epseq1}) we obtain
\begin{eqnarray}\nonumber
\pi_I F_\eta &=&
\pi_I \left[ \frac{3}{4}\left(\eta\alpha_1\right)D\ln\varepsilon -\frac{1}{4}\left(\eta\alpha_1\right) \sum_J \frac{\pi_J}{\varepsilon}\theta_J - D\left(\eta\alpha_1\right) +\frac{5}{4}\left(\eta\alpha_1\right)D\ln\varepsilon + \left(\eta\alpha_1\right) D\ln\tau_\pi \right] = \\
&=& \pi_I \left[ 2\left(\eta\alpha_1\right) D\ln\varepsilon +\left(\eta\alpha_1\right)D\ln\tau_\pi -D\left(\eta\alpha_1\right) \right]
\end{eqnarray}
Since the last term is quadratic in the pressure corrections, it can be neglected, while the remaining part equals the last term in Eq.~(\ref{IS-equivalent2}) when

\begin{equation}
\eta\alpha_1 = \frac{1}{2} \tau_\pi \qquad \hbox{or} \qquad \alpha_1=\frac{\tau_\pi}{2\eta},
\end{equation}
which is consistent with the Israel-Stewart theory~\cite{Muronga:2001zk}.

\section{Entropy source}
\label{sect:ent}

An important verification of our scheme based on Eqs.~(\ref{enmom1}), (\ref{enmom2}), (\ref{LM2}), and (\ref{explicit-sum2}) is checking if it leads to the positively defined entropy source. Using the Boltzmann definition of the entropy current, we find that it is proportional to the particle density
\begin{eqnarray}
\sigma^\mu = \sigma U^\mu = 4 n U^\mu,
\label{entcur}
\end{eqnarray}
where $n$ is defined by Eq.~(\ref{n}), hence
\begin{eqnarray}
\sigma(\lambda,\xi) =
\frac{32\pi k \lambda^3}{\sqrt{1+\xi_X} \sqrt{1+\xi_Y}\sqrt{1+\xi_Z}}.
\label{sigma}
\end{eqnarray}
Combining this equation with the formula for the energy density we find
\begin{equation}\label{eps2}
\varepsilon = 24\, \pi \, k \left( \frac{ \sigma }{32\, \pi \, k} \sqrt{ \prod_I \left( 1 +\xi_I \right) } \right)^{\frac{4}{3}} {\cal R} = \frac{3}{8} \left( 4\, \pi \, k \right)^{-\frac{1}{3}} \sigma^{\frac{4}{3}} \left[ \prod_J \left( 1 + \xi_J \right) \right]^{\frac{2}{3}} {\cal R}.
\end{equation}
Substituting this equation into Eq.~(\ref{enmom1}) gives
\begin{equation}
\frac{4}{3} \left( \frac{}{} D\ln\sigma + \theta \right) +\frac{2}{3}\sum_I \frac{ D \xi_I }{ 1 + \xi_I } + D\ln {\cal R} -\sum_I \frac{ \pi_I }{\varepsilon} \theta_I = 0,
\label{entr_sour_1}
\end{equation}
We note that the expression in the bracket on the left-hand side of Eq.~(\ref{entr_sour_1}) is proportional to the entropy source
\begin{eqnarray}
\Sigma = \partial_\mu \sigma^\mu =
\partial_\mu \left(\sigma U^\mu \right) = D \sigma + \sigma \theta.
\end{eqnarray}

We shall express now the last two terms in (\ref{entr_sour_1}) in terms of the functions ${\cal R}$ and ${\cal H}_I$. From Eq.~(\ref{H_I}) we calculate the $\xi_I$ derivative of $\ln({\cal R})$
\begin{equation}\label{derR}
\partial_{\xi_I} \left[ \ln\left( \frac{}{} {\cal R} \right) \right] = -\frac{1}{2\left( 1 +\xi_I \right)}\left[ 1 + \frac{ {\cal H}_I }{ {\cal R} } \right].
\end{equation}
Hence, the convective derivative $D\ln({\cal R})$ reads
\begin{equation}\label{DlnR}
D\ln\left( \frac{}{} {\cal R} \right) = \sum_I D\xi_I \, \partial_{\xi_I} \left[ \ln\left( \frac{}{} {\cal R} \right) \right] = -\frac{1}{2} \sum_I \left[ 1 +\frac{ {\cal H}_I }{ {\cal R} } \right] \frac{ D\xi_I }{ 1 +\xi_I }.
\end{equation}
On the other hand, using definitions of the pressure corrections~(\ref{pi_I}) and of the functions ${\cal H}_I$, we find a useful expression for the $\pi/\varepsilon$ ratio
\begin{equation}
\frac{\pi_I}{\varepsilon} = -\frac{1}{3}\left[ 1 -3 \frac{ {\cal H}_I }{ {\cal R} } \right]
\label{pi_I/eps}
\end{equation}
and
\begin{equation}
\sum_I \frac{\pi_I}{\varepsilon} \theta_I = -\frac{1}{3}\sum_I \left[ 1 -3 \frac{ {\cal H}_I }{ {\cal R} } \right] \left( \frac{1}{3}\theta + \theta_I -\frac{1}{3}\theta \right) = -\frac{1}{6} \sum_I \left[ 1 -3 \frac{ {\cal H}_I }{ {\cal R} } \right] 2\sigma_I.
\label{sumpi_I/eps}
\end{equation}
Here we replaced the components of the expansion tensor $\theta_I$ by the components of the shear tensor $\sigma_I$.

Using Eqs.~(\ref{derR})--(\ref{sumpi_I/eps}), which are exact and do not refer to the small anisotropy limit, we may write
\begin{equation}\label{entr_sour_2}
\frac{4}{3} \, \frac{ \partial_\mu \sigma^\mu }{ \sigma } +\frac{1}{6}\sum_I \left[ 1 - 3\frac{ {\cal H}_I }{ {\cal R} } \right] \frac{ D \xi_I }{ 1 + \xi_I } +\frac{1}{6}\sum_I \left[ 1 - 3\frac{ {\cal H}_I }{ {\cal R} } \right] 2 \sigma_I = 0,
\end{equation}
or, equivalently,

\begin{equation}\label{entr_sour_3}
\frac{ \partial_\mu \sigma^\mu }{ \sigma } = - \sum_I\left[ \frac{1}{8} - \frac{3}{8} \frac{ {\cal H}_I }{ {\cal R} } \right] \left( \frac{ D\xi_I }{ 1 + \xi_I } + 2 \sigma_I \right).
\end{equation}
Using now Eqs.~(\ref{LM2}), (\ref{sumH_I}), and (\ref{explicit-sum2}) we find
\begin{equation}\label{entr_sour_4}
\frac{ \partial_\mu \sigma^\mu }{ \sigma } = \frac{1}{\tau_{\rm eq}}{\cal R}^{\frac{5}{4}}\sqrt{\prod_J\left( 1 + \xi_J \right)} \, \sum_I\left[ \frac{1}{8} - \frac{3}{8} \frac{ {\cal H}_I }{ {\cal R} } \right] \xi_I = -\frac{3}{ 8 \tau_{\rm eq} }{\cal R}^{\frac{5}{4}}\sqrt{\prod_J\left( 1 + \xi_J \right)} \, \sum_I\frac{ {\cal H}_I }{ {\cal R} }\xi_I \ge 0.
\end{equation}
The last inequality has been checked numerically in the allowed range of the parameters $\xi_X$ and $\xi_Y$, see Eqs.~(\ref{sumofxis}) and (\ref{range}). For small anisotropies we use Eqs.~(\ref{shear-eq2}) and (\ref{pi_I/eps}) to find that
\begin{eqnarray}
\frac{ \partial_\mu \sigma^\mu }{ \sigma } = \frac{1}{10 \tau_{\rm eq}} \sum_I \xi_I^2,
\label{entsorceapp}
\end{eqnarray}
which is again consistent with the Israel-Stewart theory.

\section{Summary and conclusions}
\label{sect:con}

In this paper we have used the projection method for boost-invariant and cylindrically symmetric systems to introduce a new formulation of anisotropic hydrodynamics that allows for three different values of pressure acting in three different directions. Our considerations have been based on the Boltzmann kinetic equation with the collision term treated in the relaxation time approximation. The momentum anisotropy has been included explicitly in the leading term of the distribution function.

A novel feature of our work is the complete analysis of the second moment of the Boltzmann equation, in addition to the zeroth and first moments that have been analyzed in earlier studies. The framework of anisotropic hydrodynamics should include five equations for five unknown functions: $\lambda$, $T$, $\theta_\perp$, $\xi_X$ and $\xi_Y$. The first two equations follow from the energy and momentum conservation, Eqs.~(\ref{enmom1}) and (\ref{enmom2}). Their explicit, extended versions are
\begin{eqnarray}
&& \left( \cosh \theta_\perp \partial_\tau
+ \sinh \theta_\perp \partial_r \right)
\varepsilon(\lambda,\xi)
+ \varepsilon(\lambda,\xi) \left[ \cosh \theta_\perp \left( \frac{1}{\tau} + \partial_r \theta_\perp \right) + \sinh \theta_\perp \left(
\frac{1}{r} + \partial_\tau \theta_\perp \right) \right] \nonumber \\
&& + P_X(\lambda,\xi) \left( \cosh \theta_\perp \partial_r \theta_\perp + \sinh \theta_\perp \partial_\tau \theta_\perp \right) + P_Y(\lambda,\xi) \frac{\sinh \theta_\perp}{r} +P_Z(\lambda,\xi) \frac{\cosh \theta_\perp}{\tau} = 0,
\label{fineq1}
\end{eqnarray}
and
\begin{eqnarray}
&& \left( \sinh \theta_\perp \partial_\tau
+ \cosh \theta_\perp \partial_r \right) P_X(\lambda,\xi) + \varepsilon \left( \sinh \theta_\perp \partial_r \theta_\perp
+ \cosh \theta_\perp \partial_\tau \theta_\perp \right) \nonumber \\
&& + P_X(\lambda,\xi) \left[ \sinh \theta_\perp \left( \frac{1}{\tau} + \partial_r \theta_\perp \right) + \cosh \theta_\perp \left(
\frac{1}{r} + \partial_\tau \theta_\perp \right) \right] - P_Y(\lambda,\xi) \frac{\cosh \theta_\perp}{r} - P_Z(\lambda,\xi) \frac{\sinh \theta_\perp}{\tau} = 0. \label{fineq2}
\end{eqnarray}
The main result of the present work is that Eqs.~(\ref{fineq1}) and (\ref{fineq2}) should be supplemented with the two equations obtained from the second moment of the Boltzmann equation
\begin{eqnarray}
&& \frac{1}{1+\xi_I} \left( \cosh \theta_\perp \partial_\tau
+ \sinh \theta_\perp \partial_r \right) \xi_I
-\frac{1}{3}\sum_J\frac{1}{1+\xi_J}
\left( \cosh \theta_\perp \partial_\tau
+ \sinh \theta_\perp \partial_r \right) \xi_J
\nonumber \\
&& + 2\sigma_I + \frac{\xi_I}{\tau_{\rm eq}} \left( \frac{T}{\lambda} \right)^5\sqrt{ \prod_J(1+\xi_J) } = 0 \qquad (I=X,Y).
\label{fineq34}
\end{eqnarray}
The effective temperature appearing in (\ref{fineq34}) should be obtained from the Landau matching condition which, for the sake of convenience, we also repeat here
\begin{eqnarray}
\left(\frac{T}{\lambda}\right)^4 = {\cal R}(\xi).
\label{fineq5}
\end{eqnarray}
The numerical analysis of Eqs.~(\ref{fineq1})--(\ref{fineq5}) is left for a separate study.

\bigskip

{\bf Acknowledgments}: L.T. and W.F. were supported in part by the Polish National Science Center grants with decisions No. DEC-2012/06/A/ST2/00390 and No.
DEC-2012/05/B/ST2/02528, respectively.

\section{Appendix: Explicit formulas for derivatives}
\label{sect:explicitr}

The total time (or convective) derivative, $D = U^\alpha \partial_\alpha = U \cdot \partial$, describes the change of a physical quantity in the local rest frame. In the remaining part of this Section we collect the formulas involving $D$ and other derivatives which are useful in dealing with hydrodynamic equations.

\medskip \noindent Directional derivatives:
\begin{eqnarray}
U \cdot \partial &=&
\cosh\theta_\perp \partial_\tau
+ \sinh\theta_\perp \partial_r,
\quad Y \cdot \partial = \frac{1}{r} \partial_\phi,
\nonumber \\
X \cdot \partial &=& \sinh\theta_\perp \partial_\tau
+ \cosh\theta_\perp \partial_r,
\quad Z \cdot \partial = \frac{1}{\tau} \partial_{\eta_\parallel}.
\label{useful-eqns-1}
\end{eqnarray}

\medskip \noindent Divergencies:
\begin{eqnarray}
\partial \cdot U &=&
\cosh\theta_\perp \left(\frac{1}{\tau} + \partial_r \theta_\perp \right)
+ \sinh\theta_\perp \left( \frac{1}{r} + \partial_\tau \theta_\perp \right), \quad
\partial \cdot Y = 0,
\nonumber \\
\partial \cdot X &=&
\sinh\theta_\perp \left(\frac{1}{\tau} + \partial_r \theta_\perp \right)
+ \cosh\theta_\perp \left( \frac{1}{r} + \partial_\tau \theta_\perp \right), \quad
\partial \cdot Z = 0.
\label{useful-eqns-2}
\end{eqnarray}

\medskip \noindent Convective derivatives of $U$, $X$, $Y$, and $Z$:
\begin{eqnarray}
D U = (U \cdot \partial) U &=&
X \left(\cosh\theta_\perp \partial_\tau \theta_\perp + \sinh\theta_\perp \partial_r \theta_\perp \right), \quad D Y = (U \cdot \partial) Y = 0,
\nonumber \\
D X = (U \cdot \partial) X &=&
U \left(\cosh\theta_\perp \partial_\tau \theta_\perp + \sinh\theta_\perp \partial_r \theta_\perp \right), \quad D Z = (U \cdot \partial) Z = 0.
\label{useful-eqns-3}
\end{eqnarray}

\medskip \noindent Directional derivatives of $U$, $X$, $Y$, and $Z$:
\begin{eqnarray}
(X \cdot \partial) U &=&
X \left(\sinh\theta_\perp \partial_\tau \theta_\perp + \cosh\theta_\perp \partial_r \theta_\perp \right), \quad
(X \cdot \partial) Y = 0,
\nonumber \\
(X \cdot \partial) X &=&
U \left(\sinh\theta_\perp \partial_\tau \theta_\perp + \cosh\theta_\perp \partial_r \theta_\perp \right), \quad
(X \cdot \partial) Z = 0,
\label{useful-eqns-4}
\end{eqnarray}
\begin{eqnarray}
(Y \cdot \partial) U &=&
\frac{\sinh\theta_\perp}{r} \, Y, \quad
(Y \cdot \partial) Y = \frac{1}{r}\left( \frac{}{} \sinh\theta_\perp U - \cosh\theta_\perp X \right),
\nonumber \\
(Y \cdot \partial) X &=&
\frac{\cosh \theta_\perp}{r} \, Y, \quad
(Y \cdot \partial) Z =0,
\label{useful-eqns-5}
\end{eqnarray}
\begin{eqnarray}
(Z \cdot \partial) U &=&
\frac{\cosh\theta_\perp}{\tau} \, Z, \quad
(Z \cdot \partial) Y = 0,
\nonumber \\
(Z \cdot \partial) X &=&
\frac{ \sinh\theta_\perp}{\tau} \, Z , \quad
(Z \cdot \partial) Z = \frac{1}{\tau}\left( \frac{}{} \cosh\theta_\perp U - \sinh\theta_\perp X \right).
\label{useful-eqns-6}
\end{eqnarray}

\section{Appendix: Integrals for energy density and pressure}
\label{sect:R}

In order to pass from Eq.~(\ref{entr_sour_1}) to Eq.~(\ref{entr_sour_2}) we need several properties of the function ${\cal R}$ defined in~(\ref{eps1}). They follow most easily from the representation of ${\cal R}$ in the local rest frame,
\begin{eqnarray}\nonumber
\varepsilon &=& \int dP (p\cdot U)^2 \, f = k\int \frac{ {\rm d}^3 {\bf p} }{p} p^2 \exp\left[-\frac{1}{\lambda} \sqrt{ \sum_I (p^I)^2 \left( \frac{}{} 1 +\xi_I \right) } \right] \\
&=& \frac{ 6\, k\, \lambda^4 }{ \sqrt{ \prod_I (1 +\xi_I) } } \int_0^{2\pi}\!\!\!\! {\rm d}\phi \int_0^\pi \!\!\! {\rm d}\theta \sin\theta \sqrt{ \frac{ \cos^2\phi\sin^2\theta }{1+\xi_X} + \frac{ \sin^2\phi\sin^2\theta }{1+\xi_Y} + \frac{ \cos^2\theta }{1+\xi_Z} } = 24 \pi k \lambda^4 {\cal R}.
\end{eqnarray}
For simplicity of notation we use here the symbol $p^I$ to denote the three-momentum component $p_i$.

\bigskip

Similar expressions may be found for the pressures $P_X$, $P_Y$ and $P_Z$
\begin{eqnarray}
P_X &=& \int dP \, (p\cdot X)^2 \, f = k\int \frac{ {\rm d}^3 {\bf p} }{p} \left( p^X \right)^2 \exp\left[-\frac{1}{\lambda} \sqrt{ \sum_I (p^I)^2 \left( \frac{}{} 1 +\xi_I \right) } \right] \\ \label{H_X}
&=& \frac{ 6\, k\, \lambda^4 }{ \sqrt{ \prod_I (1 +\xi_I) } } \int_0^{2\pi}\!\!\!\! {\rm d}\phi \int_0^\pi \!\!\! {\rm d}\theta \sin\theta \frac{ \cos^2\phi \sin ^2\theta }{\left( 1 + \xi_X \right) \sqrt{ \frac{ \cos^2\phi\sin^2\theta }{1+\xi_X} + \frac{ \sin^2\phi\sin^2\theta }{1+\xi_Y} + \frac{ \cos^2\theta }{1+\xi_Z} } } = 24\, \pi \, k \, \lambda^4\; {\cal H}_X. \nonumber
\end{eqnarray}

\begin{eqnarray}
P_Y &=& \int dP \, (p\cdot Y)^2 \, f = k\int \frac{ {\rm d}^3 {\bf p} }{p} \left( p^Y \right)^2 \exp\left[-\frac{1}{\lambda} \sqrt{ \sum_I (p^I)^2 \left( \frac{}{} 1 +\xi_I \right) } \right] \\ \label{H_Y}
&=& \frac{ 6\, k\, \lambda^4 }{ \sqrt{ \prod_I (1 +\xi_I) } } \int_0^{2\pi}\!\!\!\! {\rm d}\phi \int_0^\pi \!\!\! {\rm d}\theta \sin\theta \frac{ \sin^2\phi \sin ^2\theta }{\left( 1 + \xi_Y \right) \sqrt{ \frac{ \cos^2\phi\sin^2\theta }{1+\xi_X} + \frac{ \sin^2\phi\sin^2\theta }{1+\xi_Y} + \frac{ \cos^2\theta }{1+\xi_Z} } } = 24\, \pi \, k \, \lambda^4\; {\cal H}_Y .
\end{eqnarray}

\begin{eqnarray}
P_Z &=& \int dP \, (p\cdot Z)^2 \, f = k\int \frac{ {\rm d}^3 {\bf p} }{p} \left( p^Z \right)^2 \exp\left[-\frac{1}{\lambda} \sqrt{ \sum_I (p^I)^2 \left( \frac{}{} 1 +\xi_I \right) } \right] = \\
\label{H_Z}
&=& \frac{ 6\, k\, \lambda^4 }{ \sqrt{ \prod_I (1 +\xi_I) } } \int_0^{2\pi}\!\!\!\! {\rm d}\phi \int_0^\pi \!\!\! {\rm d}\theta \sin\theta \frac{ \cos^2\theta }{\left( 1 + \xi_Z \right) \sqrt{ \frac{ \cos^2\phi\sin^2\theta }{1+\xi_X} + \frac{ \sin^2\phi\sin^2\theta }{1+\xi_Y} + \frac{ \cos^2\theta }{1+\xi_Z} } } = 24\, \pi \, k \, \lambda^4\; {\cal H}_Z.
\end{eqnarray}
The equations above define the functions ${\cal H}_I$.

\bigskip

\end{document}

\begin{figure}[t]
\begin{center}
\includegraphics[angle=0,width=0.55\textwidth]{Sigma.pdf}
\end{center}
\caption{(Color online) The function $-\sqrt{\Pi_J (1+\xi_J)} \sum_I {\cal H}_I \xi_I$ defining the entropy source in Eq.~(\ref{entr_sour_4}) for $\lambda=1$.}
\label{fig:Sigma}
\end{figure}
\title[Small ranks over function fields]{Jacobi sums, Fermat Jacobians,
\\and ranks of abelian varieties
\\over towers of function fields}

\section{Introduction}

\subsection{}
Given an abelian variety $A$ over a function field $K=k(\mathcal{C})$ with
$\mathcal{C}$ an absolutely irreducible, smooth, proper curve over a field
$k$, it is natural to ask about the behavior of the Mordell-Weil group of $A$ in the layers of a tower of fields over $K$. The simplest case, which is already very interesting, is when $A$ is an elliptic curve, $K=k(t)$ is a rational function field, and one considers the towers $k(t^{1/d})$ or $\overline k(t^{1/d})$ as $d$ varies through powers of a prime or through all integers not divisible by the characteristic of $k$.

When $k=\mathbb{Q}$ or more generally a number field, several authors (e.g.,
\cite{Shioda}, \cite{Stiller}, \cite{Fastenberg}, \cite{Silverman1},
\cite{Silverman2}, and \cite{Ellenberg}) have considered this question and given bounds on the rank of $A$ over $\mathbb{Q}(t^{1/d})$ or
${\overline{\mathbb{Q}}}(t^{1/d})$. In some interesting cases it can be shown that $A$
has rank bounded independently of $d$ in the tower ${\overline{\mathbb{Q}}}(t^{1/d})$.
Of course no example is yet known of an elliptic curve over $\mathbb{Q}(t)$
with unbounded ranks in the tower $\mathbb{Q}(t^{1/d})$, nor of an elliptic curve over ${\overline{\mathbb{Q}}}(t)$ with non-constant $j$-invariant and unbounded ranks in the tower ${\overline{\mathbb{Q}}}(t^{1/d})$.

When $k$ is a finite field, examples of Shioda and the author show that there are non-isotrivial elliptic curves over ${\mathbb{F}_p}(t)$ with unbounded ranks in the towers ${\overline{\mathbb{F}}_p}(t^{1/d})$ \cite{Shioda}*{Remark 10} and ${\mathbb{F}_p}(t^{1/d})$ \cite{UlmerR1}*{1.5}. More recently, the author has shown \cite{UlmerR2} that high ranks over function fields over finite fields are in some sense ubiquitous. For example, for every prime $p$ and every integer $g>0$ there are absolutely simple abelian varieties of dimension $g$ over ${\mathbb{F}_p}(t)$ with unbounded ranks in the tower ${\mathbb{F}_p}(t^{1/d})$, and given any non-isotrivial elliptic curve $E$ over ${\mathbb{F}_q}(t)$, there exists a finite extension ${\mathbb{F}}_{r}(u)$ such that $E$ has unbounded (analytic) ranks in the tower ${\mathbb{F}}_{r}(u^{1/d})$.

One obvious difference between number fields and finite fields which might be relevant here is the complexity of their absolute Galois groups: that of a finite field is pro-cyclic while that of a number field is highly non-abelian. Ellenberg uses this non-abelianess in a serious way in his work on bounding ranks and, in a private communication, he asked whether it might be the case that, say, a non-isotrivial elliptic curve over ${\mathbb{F}_q}(t)$ always has unbounded rank in the tower ${\mathbb{F}_p}(t^{1/d})$.

Our goal in this note, which is a companion to \cite{UlmerR2}, is to give a number of examples of abelian varieties over function fields
${\mathbb{F}_q}(t)$ which have bounded ranks in the towers ${\overline{\mathbb{F}}_q}(t^{1/d})$ as
$d$ ranges through powers of a suitable prime or through all integers not divisible by $p$, the characteristic of ${\mathbb{F}_q}$. We also get some information about ranks in towers $k(t^{1/d})$ for arbitrary fields
$k$. Along the way we prove some new results on Fermat curves which may be of independent interest. The main results are Theorems~\ref{thm:GaussJacobiBounds}, \ref{thm:Jd0}, \ref{thm:Jdp},
\ref{thm:isotrivial}, \ref{thm:nonisol}, and \ref{thm:nonisod}

\subsection{}
It is a pleasure to thank Jordan Ellenberg for his stimulating questions about ranks of elliptic curves as well as Brian Conrey, Bill McCallum, and Dinesh Thakur for their help. Special thanks are due to Bjorn Poonen for several incisive remarks and for pointing out that some arguments originally given for elliptic curves apply more generally to higher-dimensional abelian varieties.

\section{Jacobi sums}

\subsection{}
Throughout the paper $p$ will be a rational prime number, ${\mathbb{F}_p}=\mathbb{Z}/p\mathbb{Z}$
will be the prime field of characteristic $p$, and $q=p^f$ will be a power of $p$. Fix an algebraic closure ${\overline{\mathbb{Q}}}$ of $\mathbb{Q}$. All number fields considered will tacitly be assumed to be subfields of ${\overline{\mathbb{Q}}}$.
We denote by $\mu_d$ the group of $d$-th roots of unity in ${\overline{\mathbb{Q}}}$.

Let $\mathfrak{p}$ be a prime of $\mathcal{O}_{\overline{\mathbb{Q}}}$, the ring of integers of ${\overline{\mathbb{Q}}}$,
over $p$. The field $\mathcal{O}_{\overline{\mathbb{Q}}}/\mathfrak{p}$ is an algebraic closure of ${\mathbb{F}_p}$
which we denote by ${\overline{\mathbb{F}}_p}$ and we write ${\mathbb{F}_q}$ for its subfield of cardinality $q$.

Reduction modulo $\mathfrak{p}$ induces an isomorphism between the group of all roots of unity of order prime to $p$ in $\mathcal{O}_{\overline{\mathbb{Q}}}$ and the multiplicative group $(\mathcal{O}_{\overline{\mathbb{Q}}}/\mathfrak{p})^\times={\overline{\mathbb{F}}_p}^\times$. We let
$t:{\overline{\mathbb{F}}_p}^\times\to{\overline{\mathbb{Q}}}^\times$ denote the inverse of this isomorphism. We will use the same letter $t$ for the restriction to any of the finite fields ${\mathbb{F}_q^\times}$. Every character of ${\mathbb{F}_q^\times}$ is a power of $t$.

\subsection{}
Fix a non-trivial additive character $\psi_p:{\mathbb{F}_p}\to{\overline{\mathbb{Q}}}^\times$. For each $q$ we define an additive character $\psi_q$ as
$\psi_q=\psi_p\circ\tr_{{\mathbb{F}_q}/{\mathbb{F}_p}}$.

For each $q$ and each character $\chi$ of ${\mathbb{F}_q^\times}$, we define a Gauss sum
$$G_q(\chi)=-\sum_{x\in{\mathbb{F}_q^\times}}\chi(x)\psi_q(x)\in\mathbb{Q}(\mu_{p(q-1)}).$$
It is well known that $G_q(\chi)=1$ if $\chi$ is the trivial character and that $G_q(\chi)$ is an algebraic integer with absolute value
$q^{1/2}$ in every complex embedding if $\chi\neq1$.

For $d$ prime to $q$, $a\in\mathbb{Z}/d\mathbb{Z}$, and any $q\equiv1\pmod d$ we write
$G_q(a)$ for $G_q(t^{-a(q-1)/d})$ which lies in $\mathbb{Q}(\mu_{pd})$. The analysis leading to Stickelberger's theorem \cite{Washington}*{6.2}
shows that if $\wp$ is the prime of $\mathbb{Q}(\mu_{pd})$ under $\mathfrak{p}$,
$q=p^f$, and $a\not\equiv0\pmod{d}$ then
$$\ord_{\wp}G_q(a)=(p-1)\sum_{j=0}^{f-1}
\left\langle\frac{p^ja}{d}\right\rangle$$
where $\langle x\rangle$ is the fractional part of $x$, i.e.,
$0\le\langle x\rangle<1$ and $x-\langle x\rangle\in\mathbb{Z}$.

\subsection{}\label{ss:JacobiSums}
Fix a positive integer $w$. For each $q$ and each tuple of non-trivial characters $\chi_0,\dots,\chi_{w+1}$ of ${\mathbb{F}_q^\times}$ such that the product $\chi_0\cdots\chi_{w+1}$ is trivial, we define a Jacobi sum
$$J_q(\chi_0,\dots,\chi_{w+1})=\frac1{q-1}
\sum_{\substack{x_0,\dots,x_{w+1}\in{\mathbb{F}}_{p^f}^\times\\x_0+\cdots+x_{w+1}=0}}
\chi_0(x_0)\cdots\chi_{w+1}(x_{w+1})\in\mathbb{Q}(\mu_{q-1}).$$

It is well-known and elementary (see \cite{WeilNS}*{p.~501} for example) that
$$J_q(\chi_0,\dots,\chi_{w+1})=\frac{(-1)^w}q\prod_{i=0}^{w+1}G_q(\chi_i).$$
In particular, the Jacobi sum is an algebraic integer with absolute value $q^{w/2}$ in every complex embedding.

Let $A_{d,w}\subset(\mathbb{Z}/d\mathbb{Z})^{w+2}$ be the set of tuples
${\mathbf{a}}=(a_0,\dots,a_{w+1})$ such that $a_i\neq0$ for all $i$ and $\sum a_i=0$. If ${\mathbf{a}}\in A_{d,w}$ and $q\equiv1\pmod d$, we write $J_q({\mathbf{a}})$
for $J_q(t^{-a_0(q-1)/d},\dots,t^{-a_{w+1}(q-1)/d})$; clearly
$J_q({\mathbf{a}})\in\mathbb{Q}(\mu_d)$. If $\wp$ is the prime of $\mathbb{Q}(\mu_{d})$ under
$\mathfrak{p}$ and $q=p^f$, then
$$\ord_\wp J_q({\mathbf{a}})=\sum_{i=0}^{w+1}\sum_{j=0}^{f-1}
\left\langle\frac{p^ja_i}{d}\right\rangle-f.$$

We write $A'_{d,w}$ for those ${\mathbf{a}}\in A_{d,w}$ such that
$\gcd(d,a_0,\dots,a_{w+1})=1$. Note that if ${\mathbf{a}}\in A_{d,w}$ and if
$e=\gcd(d,a_0,\dots,a_{w+1})$, $d'=d/e$ and
${\mathbf{a}}'=(a_0/e,\dots,a_{w+1}/e)\in A'_{d',w}$ then for any $q\equiv1\pmod d$ we have $J_q({\mathbf{a}})=J_q({\mathbf{a}}')$.

Many of our results on ranks will be based on part (2) of the following theorem about the distribution of Gauss and Jacobi sums.
Roughly speaking, it says that sums involving characters of large order must either have large degree over $\mathbb{Q}$ or have valuation bounded away from 0.

\begin{thm}\label{thm:GaussJacobiBounds}
\hfill\break
\begin{enumerate}
\item \vskip-12pt Fix a real number $\epsilon>0$ and a positive integer $n$. There exists a constant $C_{\epsilon,n}$ depending only on $\epsilon$ and $n$ such that if $d>C_{\epsilon,n}$,
$q=p^f\equiv1\pmod d$, $a\in(\mathbb{Z}/d\mathbb{Z})^\times$, and the degree of
$G_q(a)$ over $\mathbb{Q}(\mu_p)$ is $\le n$, then
$$\left|\frac{\ord_\wp G_q(a)}{(p-1)f}-\frac12\right|<\epsilon.$$
Here $\wp$ is the prime of $\mathbb{Q}(\mu_{pd})$ under $\mathfrak{p}$. Note that
$\ord_\wp(q)=(p-1)f$.
\item Fix a positive integer $n$. There exist constants $C_n$ and
$\epsilon_n>0$ depending only on $n$ such that if $d>C_n$,
$q=p^f\equiv1\pmod d$, $w\ge1$, ${\mathbf{a}}\in A'_{d,w}$, and the degree of
$J_q({\mathbf{a}})$ over $\mathbb{Q}$ is $\le n$, then
$$\frac{\ord_\wp J_q({\mathbf{a}})}{f}>\epsilon_n.$$
Here $\wp$ is the prime of $\mathbb{Q}(\mu_d)$ under $\mathfrak{p}$. Note that
$\ord_\wp(q)=f$.
\end{enumerate}
\end{thm}

\begin{rems}
\hfill\break
\begin{enumerate}
\item \vskip-12pt The constants appearing in the theorem are {\it independent of\/} $p$ and effectively computable.
\item In part (2) of the theorem, we may replace ``the degree of
$J_q({\mathbf{a}})$ over $\mathbb{Q}$ is $\le n$'' with ``the degree of the largest subfield of $\mathbb{Q}(J_q({\mathbf{a}}))$ in which $p$ splits completely is $\le n$'' and similarly in part (1). I do not know whether this has any applications to geometry.
\end{enumerate}
\end{rems}

The theorem is a consequence of Stickelberger's theorem and the following simple estimate.

\begin{prop}\label{prop:BasicEstimate}
Fix a real number $\epsilon>0$ and a positive integer $n$. There exists a constant $C_{\epsilon,n}$ depending only on $\epsilon$ and
$n$ such that if $d>C_{\epsilon,n}$ and $H\subset G=(\mathbb{Z}/d\mathbb{Z})^\times$
is a subgroup of index $\le n$, then for all $a\in G$,
$$\left|\frac{1}{|H|}\sum_{t\in H}\left\langle\frac{ta}{d}
\right\rangle-\frac12\right|< \epsilon.$$
\end{prop}

\begin{proof}
We have
\begin{align*}
A:=\frac{1}{|H|}\sum_{t\in H}\left\langle\frac{ta}{d}\right\rangle
&=\frac{1}{|H|}\sum_{\substack{s=1\\(s,d)=1}}^{d-1}\frac sd\frac1{[G:H]}
\sum_{\chi\in\widehat{G/H}}\chi(sa^{-1})\\
&=\frac12 +\frac1{d\phi(d)}\sum_{1\neq\chi\in\widehat{G/H}}\chi(a^{-1})
\sum_{\substack{s=1\\(s,d)=1}}^{d-1}\chi(s)s
\end{align*}
where $|H|$ denotes the order of $H$, $\widehat{G/H}$ denotes the group of characters of $G/H$ (which we view as characters of $G$
trivial on $H$), and $\phi(d)=|G|$ is Euler's function. Partial summation and the Polya-Vinogradov inequality \cite{Davenport}*{\S23}
show that there is an absolute constant $C$ such that the inner sum above is $<Cd^{3/2}\log d$ and so the quantity $A$ to be estimated satisfies
$$\left|A-\frac12\right|\le\frac{Cnd^{1/2}\log d}{\phi(d)}.$$
Well-known estimates for $\phi(d)$ \cite{HardyWright}*{Thm~327} say that for all $\delta>0$, $\phi(d)/d^{1-\delta}\to\infty$ as
$d\to\infty$ so there is a constant $C_{\epsilon,n}$ depending only on $n$ and $\epsilon $ such that
$$\frac{Cnd^{1/2}\log d}{\phi(d)}< \epsilon$$
whenever $d>C_{\epsilon,n}$. This completes the proof of the proposition.
\end{proof}

\begin{cor}\label{cor:alld}
Given $n$ there exists a constant $\delta_n>0$ depending only on $n$
such that for any $d\ge2$, any $0\neq a\in\mathbb{Z}/d\mathbb{Z}$, and any subgroup
$H\subset G=(\mathbb{Z}/d\mathbb{Z})^\times$ of index $\le n$,
$$\frac{1}{|H|}\sum_{t\in H}\left\langle\frac{ta}{d}\right\rangle>\delta_n$$
\end{cor}

\begin{proof}
For $0\neq a\in(\mathbb{Z}/d\mathbb{Z})$, set $e=\gcd(a,d)$, $d'=d/e$,
$G'=(\mathbb{Z}/d'\mathbb{Z})^\times$, $a'=a/e$, and $H'=\im(H\to G')$. Then the index of $H'$ in $G'$ is $\le n$ and we have
$$A:=\frac{1}{|H|}\sum_{t\in H}\left\langle\frac{ta}{d}\right\rangle=
\frac{1}{|H'|}\sum_{t\in H'}\left\langle\frac{ta'}{d'}\right\rangle$$
and so we may assume that $\gcd(a,d)=1$, i.e., that $a\in G$.

Given $n$, let $C_{1/4,n}$ be the constant furnished by the proposition for $n$ and $\epsilon =1/4$. If $d>C_{1/4,n}$ then by the proposition, $A>1/4$. On the other hand, there are only finitely many
$d\le C_{1/4,n}$ and for each $d$, only finitely many subgroups
$H\subset(\mathbb{Z}/d\mathbb{Z})$ of index $\le n$. Since $A>0$ for each of these finitely many possibilities, there is a $\delta_n>0$ such that
$A>\delta_n$ for all $d$ and $a$.
\end{proof}

\subsection{Proof of Theorem~\ref{thm:GaussJacobiBounds} (1)}
Given $\epsilon$ and $n$, suppose that $d$, $q=p^f\equiv1\pmod d$, and
$a\in G=(\mathbb{Z}/d\mathbb{Z})^\times\cong\gal(\mathbb{Q}(\mu_{pd})/\mathbb{Q}(\mu_p))$, are such that $G_q(a)\in\mathbb{Q}(\mu_{pd})$ has degree $\le n$ over $\mathbb{Q}(\mu_p)$. Let
$H\subset G$ be the subgroup of $G$ fixing $\mathbb{Q}(\mu_p,G_q(a))$, so that
$H$ has index $\le n$ in $G$. If $\wp$ is the prime of $\mathbb{Q}(\mu_{pd})$
under $\mathfrak{p}$, then for every $t\in H$, we have
$\ord_{\wp^t}(G_q(a))=\ord_{\wp}(G_q(a))$. Therefore,
\begin{align*}
\frac{\ord_{\wp}G_q(a)}{(p-1)f}
&=\frac1{|H|}\sum_{t\in H} \frac{\ord_{\wp^t}G_q(a)}{(p-1)f}\\
&=\frac1{|H|f}\sum_{t\in H}\sum_{j=0}^{f-1}
\left\langle \frac{p^jta}{d}\right\rangle
\end{align*}
where the second equality comes from Stickelberger's theorem. Let
$P$ be the subgroup of $(\mathbb{Z}/d\mathbb{Z})^\times$ generated by $p$ and $HP$
the subgroup generated by $H$ and $P$. The last displayed sum is then equal to
$$\frac{1}{|HP|}\sum_{t\in HP}\left\langle\frac{ta}{d}\right\rangle.$$
Since $H$ has index $\le n$ in $G$, the same is true of $HP$ and so Proposition~\ref{prop:BasicEstimate} shows that if $d>C_{\epsilon,n}$
then
$$\left|\frac{\ord_{\wp}G_q(a)}{(p-1)f}-\frac12\right|<\epsilon$$
as was to be shown.
\qed

\subsection{Proof of Theorem~\ref{thm:GaussJacobiBounds} (2)}
Given ${\mathbf{a}}\in A'_{d,w}$, set $d_i=d/\gcd(d,a_i)$. The following lemma tells us that if $d$ is large then at least two of the $d_i$ are also large.

\begin{subsublemma}
With notation as above, there exists an absolute constant $C$ such that at least two of the $d_i$ are $\ge C\log d$.
\end{subsublemma}

\begin{proof}
If $\ell$ divides $d$ then from the definitions, there are at least two $i$'s such that $\ell$ does not divide $a_i$. Therefore the largest prime power dividing $d$ also divides at least two of the
$d_i$.

To finish we note that Chebyschev's theorem
\cite{HardyWright}*{Thm.~7} implies that the the largest prime power dividing $d$ is $\ge C'\log d$ for some absolute constant $C'$.
Indeed, let $M$ be a positive number, let $p_1,\dots,p_{\pi(M)}$ be the primes less than $M$, and let $p_i^{e_i}$ be the largest power of
$p_i$ less than $M$. If $N=\prod_{i=1}^{\pi(M)}p_i^{e_i}$ then
$$\log N=\sum_{i=1}^{\pi(M)}e_i\log p_i\le\pi(M)\log M\le C'M$$
by Chebyschev. This shows that if $N$ is a product of prime powers less than $M$, then $N\le e^{C'M}$. Therefore the largest prime power dividing $N$ is at least $C\log N$ where $C=1/C'$.
\end{proof}

Now fix $n$ and consider those $q\equiv1\pmod d$ and ${\mathbf{a}}$ such that
$J_q({\mathbf{a}})$ has degree $\le n$ over $\mathbb{Q}$. Let $H\subset G=(\mathbb{Z}/d\mathbb{Z})^\times\cong\gal(\mathbb{Q}(\mu_d)/\mathbb{Q})$ be the subgroup fixing
$\mathbb{Q}(J_q({\mathbf{a}}))$ so that $H$ has index $\le n$ in $G$. Then we have
\begin{align*}
A(q,{\mathbf{a}}):=\frac{\ord_{\wp}J_q({\mathbf{a}})}{f}
&=\frac{1}{|H|}\sum_{t\in H}\frac{\ord_{\wp^t}(J_q({\mathbf{a}}))}{f}\\
&=\frac{1}{|H|}\sum_{t\in H}\frac 1f \left(\sum_{i=0}^{w+1}
\sum_{j=0}^{f-1}\left\langle\frac{tp^ja_i}{d}\right\rangle-f\right)\\
&=\left(\sum_{i=0}^{w+1}\frac{1}{|HP|}
\sum_{t\in HP}\left\langle\frac{ta_i}{d}\right\rangle\right)-1
\end{align*}
where as before $P$ is the subgroup of $(\mathbb{Z}/d\mathbb{Z})^\times$ generated by
$p$ and $HP$ is the subgroup generated by $H$ and $P$. Reindexing
${\mathbf{a}}$, we may assume that $d_0$ and $d_1$ are $\ge C\log d$. Since $H$
has index $\le n$, so does $HP$ and so we get bounds on the inner sums in the last displayed equation. More precisely, by Corollary~\ref{cor:alld}, the inner sum is $>\delta_n$ for
$i=2,\dots,w+1$, and by Proposition~\ref{prop:BasicEstimate}, if $d_0$
and $d_1$ are sufficiently large (so that $C\log d>C_{\epsilon,n}$),
the $i=0$ and $i=1$ terms are $>1/2-\epsilon$. Applying this with
$\epsilon=\delta_n/4\le w\delta_n/4$, we see that for sufficiently large $d$, $A(q,{\mathbf{a}})\ge(w-1/2)\delta_n\ge\delta_n/2$. This completes the proof of part (2) of the theorem. \qed

\section{Fermat Jacobians}

\subsection{}
Let $k$ be an arbitrary field with separable closure $\overline k$.
For each positive integer $d$ not divisible by the characteristic of
$k$ we consider the Fermat curve $F_d$ of degree $d$ over $k$ (the zero locus of $\sum_{i=0}^2x_i^d$ in $\mathbb{P}^2$) and its Jacobian $J_d$.
If $A$ is an abelian variety over $k$, we say that ``$A$ appears in
$J_d$'' if there is a homomorphism of abelian varieties $A\to J_d$
with finite kernel. We say ``$A$ appears in $J_d$ with multiplicity
$m$'' if $m$ is the largest integer such that $A^m$ appears in $J_d$.
The multiplicity with which $A$ appears in $J_d$ obviously depends only on the $k$-isogeny class of $A$.

The following two theorems are the main results of this section.

\begin{thm}\label{thm:Jd0}
Suppose that $k$ is a field of characteristic zero. Then for every positive integer $g$, only finitely many $k$-isogeny classes of abelian varieties of dimension $\le g$ appear in $J_d$ as $d$ varies through all positive integers. If $A$ is an abelian variety over
$k$, then the multiplicity with which $A$ appears in $J_d$ is bounded by a constant depending only on the dimension of $A$.
\end{thm}

If $k$ has characteristic $p$ and $A$ is an abelian variety over $k$,
the $p$-rank of $A$ is by definition the dimension over ${\mathbb{F}_p}$ of the group of $\overline k$-rational $p$-torsion points on $A$. It is known that the $p$-rank lies in the interval $[0,\dim A]$ and that it is invariant under isogeny.

\begin{thm}\label{thm:Jdp}
Suppose that $k$ is a field of characteristic $p>0$. Then for every positive integer $g$, only finitely many $k$-isogeny classes of abelian varieties with positive $p$-rank and dimension $\le g$
appear in $J_d$ as $d$ varies through all positive integers prime to
$p$. If $A$ is an abelian variety over $k$ with positive $p$-rank,
then the multiplicity with which $A$ appears in $J_d$ is bounded by a constant depending only on the dimension of $A$.
\end{thm}

\begin{rems}
\hfill\break
\begin{enumerate}
\item \vskip-12pt We repeat that the constants in the theorems depend only on the dimension $g$. In particular, they are independent of the characteristic of $k$. As will be clear from the proof, they are also effectively computable.
\item Theorem~\ref{thm:Jd0} is already known in a more precise quantitative form by results of Aoki \cite{Aoki}, building on work of Koblitz, Rohrlich, and Shioda. Theorem~\ref{thm:Jdp} may be known to experts but to my knowledge is not in the literature. We will give a very simple proof of Theorem~\ref{thm:Jdp} for $k$
finite and use this to deduce the general case and Theorem~\ref{thm:Jd0}.
\item It is proven in \cite{TS}, and by a different method in
\cite{UlmerR2}, that over a field of characteristic $p$, the multiplicity with which a supersingular elliptic curve appears in
$J_d$ is unbounded as $d$ varies. Thus the last part of Theorem~\ref{thm:Jdp} would be false without the hypothesis of positive $p$-rank. It is not clear what to expect for abelian varieties with $p$-rank zero which are not $\overline k$-isogenous to a product of supersingular elliptic curves.
\end{enumerate}
\end{rems}

The proofs of the theorems will be given in rest of this section.

\subsection{}\label{ss:oldnew}
If $d'<d$ is a divisor of $d$, then there is a canonical surjective morphism $F_d\to F_{d'}$ ($x_i\mapsto x_i^{d/d'}$) which (because
$F_d\to F_{d'}$ is totally ramified at some place) induces an injection of Jacobians $J_{d'}\hookrightarrow J_d$. We define the {\it old part\/} $J_d^\text{old}$ to be the abelian subvariety of $J_d$
generated by the images of the morphisms $J_{d'}\hookrightarrow J_d$ as $d'$
varies through proper divisors of $d$ and we define the {\it new part\/} $J_d^\text{new}$ of $J_d$ to be the abelian variety over $k$
(well-defined only up to $k$-isogeny) such that $J_d$ is isogenous to
$J_d^\text{new}\times J_d^\text{old}$. It is not hard to check, for example by using the zeta function calculation mentioned in
\ref{ss:FermatZetas} below, that $J_d$ is isogenous to
$\prod_{d'|d}J_{d'}^\text{new}$.

Theorem~\ref{thm:Jd0} therefore follows from the statement that there is a constant $C_g$ depending only on $g$ such that no abelian variety
$A$ of dimension $\le g$ appears in $J_d^\text{new}$ for any $d>C_g$.
Theorem~\ref{thm:Jdp} follows from the same statement with the additional hypotheses that $A$ has positive $p$-rank and $d$ is not divisible by $p$.

\subsection{}\label{ss:algfields}
Given a field $k$, let $\mathbb{F}$ be its prime field and $k_0$ be the algebraic closure of $\mathbb{F}$ in $k$. Then $k_0$ is a perfect field and so the extension $k/k_0$ is regular. The Fermat Jacobian $J_d$ and its new part $J_d^\text{new}$ are defined over $\mathbb{F}$ and so if $A$ is an abelian variety over $k$ which appears in
$J_d^\text{new}\times_{\mathbb{F}} k$ then there is an abelian variety $A_0$
defined over $k_0$ which appears in $J_d^\text{new}\times_{\mathbb{F}} k_0$
and with $A_0\times_{k_0} k\cong A$. (This is an old result of Chow which has been given a detailed modern treatment by Conrad, see
\cite{Conrad}*{3.21}.) Moreover, the abelian variety $A_0$ and the morphism $A_0\to J_d^\text{new}$ are both defined over some finite extension of $\mathbb{F}$. Thus it will suffice to prove the existence of the constants $C_g$ mentioned at the end of Subsection~\ref{ss:oldnew}
(depending only on $g$, not on $k$) for the cases when $k$ is a number field or a finite field.

\subsection{}\label{ss:HondaTate}
Let $k$ be ${\mathbb{F}_q}$, the subfield of ${\overline{\mathbb{F}}_p}=\mathcal{O}_{{\overline{\mathbb{Q}}}}/\mathfrak{p}$ with $q$
elements. A {\it Weil $q$-integer of weight 1\/} is an algebraic integer $\alpha$ whose absolute value in every complex embedding is
$q^{1/2}$. For the rest of this section we will call these simply
{\it Weil numbers\/}.

Honda-Tate theory \cite{TateHT} says that ${\mathbb{F}_q}$-isogeny classes of
${\mathbb{F}_q}$-simple abelian varieties are in bijection with $\gal({\overline{\mathbb{Q}}}/\mathbb{Q})$
orbits of Weil numbers. If $A$ corresponds to $\alpha$, then
$E=\en_{{\mathbb{F}_q}}(A)\otimes\mathbb{Q}$ is a central simple algebra over
$\mathbb{Q}(\alpha)$ whose invariants in the Brauer group of $\mathbb{Q}(\alpha)$ can be calculated in terms of the decomposition of $p$ in $\mathbb{Q}(\alpha)$.
The dimension of $A$ is $(1/2)[E:\mathbb{Q}(\alpha)]^{1/2}[\mathbb{Q}(\alpha):\mathbb{Q}]$ and the eigenvalues of Frobenius on $H^1(A\times{\overline{\mathbb{F}}_q},{\mathbb{Q}_\ell})$ are the conjugates of $\alpha$, each appearing with multiplicity
$[E:\mathbb{Q}(\alpha)]^{1/2}$. The $p$-rank of $A$ is equal to the number of eigenvalues of Frobenius which are units at $\mathfrak{p}$ and so $A$ has positive $p$-rank if and only if some conjugate of $\alpha$ is a unit at $\mathfrak{p}$.

If $C$ is a curve of genus $g$ over ${\mathbb{F}_q}$ and the $Z$-function of $C$
is
$$\frac{\prod_{i=1}^{2g}(1-\alpha_iT)}{(1-T)(1-qT)}$$
then the Weil numbers of the ${\mathbb{F}_q}$-simple factors of the Jacobian $J$
of $C$ are precisely the $\alpha_i$. The multiplicity of $\alpha_i$
in the numerator is the multiplicity of the corresponding $A$ in $J$
up to ${\mathbb{F}_q}$-isogeny times $[E_A:\mathbb{Q}(\alpha)]^{1/2}$.

\subsection{}\label{ss:FermatZetas}
Given a positive integer $d$ and a prime power $q$ such that $q\equiv 1\pmod{d}$ we consider the Fermat curve $F_d$ over ${\mathbb{F}_q}$. By a theorem of Weil \cite{WeilNS}, the $Z$-function of $F_d$ over ${\mathbb{F}_q}$ is
$$\frac{\prod_{{\mathbf{a}}\in A_{d,1}}(1-J_q({\mathbf{a}})T)}{(1-T)(1-qT)}$$
where $A_{d,1}$ was defined in Subsection~\ref{ss:JacobiSums}.

It is clear from Weil's computation of the $Z$-function that the Weil numbers of $J_d^\text{new}$ are precisely the $J_q({\mathbf{a}})$ as ${\mathbf{a}}$ runs through
$$A'_{d,1}=\{{\mathbf{a}}=(a_0,a_1,a_2)\in A_{d,1}|\gcd(d,a_0,a_1,a_2)=1\}.$$

\subsection{The case of finite fields}
We assume $k={\mathbb{F}_q}$ and that $A$ is an abelian variety over $k$ which has positive $p$-rank and dimension $\le g$ and appears in
$J_d^\text{new}$. In this case, the Weil numbers of $A$ are among the Weil numbers of $J_d^\text{new}$. Extending $k$ if necessary, we may assume that $d|(q-1)$ and so the Weil numbers of $J_d^\text{new}$ are the Jacobi sums $J_q({\mathbf{a}})$ where ${\mathbf{a}}\in A'_{d,1}$. By the results recalled in Subsections~\ref{ss:HondaTate} and \ref{ss:FermatZetas} it follows that some $J_q({\mathbf{a}})$ has degree $\le 2g$ over $\mathbb{Q}$ and is a unit at the prime $\mathfrak{p}$. This implies that $d$ is $\le C_{2g}$ where
$C_{2g}$ is the constant appearing in Theorem~\ref{thm:GaussJacobiBounds}(2) for $n=2g$. Therefore no abelian variety of positive $p$-rank and dimension $\le g$ appears in
$J_d^\text{new}$ for large $d$ and this establishes Theorem~\ref{thm:Jdp} for finite fields. The argument in Subsection~\ref{ss:algfields} shows that the theorem also holds for arbitrary fields of positive characteristic.

\subsection{The case of number fields}
Suppose that $A$ is an abelian variety of dimension $\le g$ defined over a number field $k$. Suppose that $d$ is larger than the constant
$C(g)=C_{2g}$ appearing in Theorem~\ref{thm:Jdp} and that $A$ appears in $J_d^\text{new}$. Then for every prime $\wp$ of $k$ where $A$ has good reduction, by Theorem~\ref{thm:Jdp} the reduction $A\times
\mathbb{F}_\wp$ has $p$-rank 0. This would violate the following result,
which appears in \cite{Ogus}*{2.7.1}:

\begin{subsublemma} \textup{(}Katz\textup{)}
If $A$ is an abelian variety over a number field $k$, then for infinitely many primes of $k$, the reduction of $A$ has positive
$p$-rank.
\end{subsublemma}

For the convenience of the reader, we sketch the proof of the lemma.
Choose a prime $\ell$ larger than $2g$. Let $L$ be a finite extension of $k$ such that $\gal({\overline{\mathbb{Q}}}/L)$ acts trivially on the $\ell$-torsion of $A$. If $\wp$ is a prime of $L$ over the rational prime $p$ where the reduction of $A$ has $p$-rank zero, then the trace of the Frobenius at $\wp$ on $H^1(A\times\overline k,{\mathbb{Q}_\ell})$ is an integer
$\equiv0\pmod{p}$ and $\le2g(\N\wp)^{1/2}$. If $\wp$ has absolute degree 1 over $\mathbb{Q}$ (i.e., $\N\wp=p$), and $\sqrt{p}>2g$ then we see that the trace must be zero. On the other hand, since $\gal({\overline{\mathbb{Q}}}/L)$
acts trivially on $\ell$-torsion, the trace must be
$\equiv2g\pmod\ell$. Since $\ell>2g$ this is impossible. The conclusion is that the reduction of $A$ at a prime of absolute degree one over a large $p$ must have positive $p$-rank. Such primes have density one in $L$ and the primes under them in $k$ are an infinite set satisfying the conclusion of the lemma.

We note that a stronger version of this result for abelian varieties over $\mathbb{Q}$ is proven in \cite{BayerGonzalez}*{Prop.~5.1}.

The lemma completes the proof of Theorem~\ref{thm:Jd0} for number fields and, as explained in Subsection~\ref{ss:algfields}, therefore also for arbitrary fields of characteristic zero.

\section{Isotrivial abelian varieties with bounded ranks in
$\hat\mathbb{Z}$ or ${\hat\mathbb{Z}^{(p)}}$-towers}

\subsection{}
In the rest of the paper we will give examples of abelian varieties with bounded ranks in towers of function fields over various fields
$k$. Before doing so, let us dispense with a trivial situation: if
$A$ is an abelian variety over $k(t)$ with good reduction away from
$0$ and $\infty$ and at worst tame ramification at $0$ and $\infty$,
then for any $d$ prime to the characteristic of $k$, the degree of the conductor of $A$ over $k(t^{1/d})$ is bounded independently of $d$.
Geometric rank bounds then show that the rank of $A$ over $k(t^{1/d})$
is also bounded independently of $d$. Therefore it is only interesting to consider situations where the degree of the conductor grows in the tower under consideration. All our examples below are of this type.

\subsection{}
We review some well-known facts about constant and isotrivial abelian varieties. Let $k$ be any field, let $L$ be the function field of a geometrically irreducible curve $\mathcal{C}$ smooth and proper over $\spec k$, and let $J$ be the Jacobian of $\mathcal{C}$. Let $A_0$ be an abelian variety over $k$ and let $A=A_0\times_kL$. Then it is clear that
$A(L)$, the group of $L$-rational points of $A$, is canonically isomorphic to $\mor_k(\mathcal{C},A_0)$, the group of $k$-scheme morphisms from $\mathcal{C}$ to $A_0$. Moreover, we have an exact sequence
\begin{equation*}
0\to A_0(k)\to\mor_k(\mathcal{C},A_0)\to\Hom_{k\text{-av}}(J,A_0)
\end{equation*}
where a $k$ point of $A_0$ is sent to the constant map with that value and a morphism from $\mathcal{C}$ to $A_0$ is sent to the homomorphism of abelian varieties induced by Albanese functoriality. If $\mathcal{C}$ has a $k$-rational divisor of degree 1 (for example if $k$ is finite) then the last map above is surjective. If $k$ is finitely generated over its prime field, then by the Lang-N\'eron theorem, $A_0(k)$ is finitely generated. (See \cite{Conrad} for a modern treatment of the Lang-N\'eron theorem.) For any $k$, $\Hom_{k\text{-av}}(J,A_0)$ is finitely generated and torsion free. If $A_0$ is $k$-simple, then the rank of $\Hom_{k\text{-av}}(J,A_0)$ is equal to the rank of the endomorphism ring of $A_0$ times the multiplicity with which $A_0$
appears in $J$ up to $k$-isogeny.

\subsection{}
Continuing with the notation of the last subsection, suppose that
$\mathcal{C}$ is hyperelliptic, i.e., we are given a degree 2 morphism
$\mathcal{C}\to\mathbb{P}^1$. Let $A'$ be the twist of $A=A_0\times_k k(t)$ by the quadratic extension $L/k(t)$. Since there are no non-constant morphisms from $\mathbb{P}^1$ to an abelian variety, we have
$A(k(t))=A_0(k)$. Since
\begin{equation*}
A(L)\otimes\mathbb{Q}\cong \left(A(k(t))\otimes\mathbb{Q}\right) \bigoplus
\left(A'(k(t))\otimes\mathbb{Q}\right)
\end{equation*}
we conclude that $A'(k(t))$ has finite rank, bounded above by
\begin{equation}\label{eq:isotrank}
\dim_\mathbb{Q} A'(k(t))\otimes\mathbb{Q}=\dim_\mathbb{Q} \Hom_{k\text{-av}}(J,A_0)\otimes\mathbb{Q}
=\rk_\mathbb{Z} \Hom_{k\text{-av}}(J,A_0)
\end{equation}
with equality when $\mathcal{C}$ has a $k$-rational divisor of degree 1.

\subsection{}
We can now apply the rank formula above and our results about Fermat Jacobians to give examples of bounded ranks in towers. Let $K_1=k(t)$
and for every positive integer $d$ not divisible by the characteristic of $k$, let $K_d=k(t^{1/d})$. If the characteristic of $k$ is not 2,
let $L_1=k(u)$ with $u^2=t-1$; if the characteristic of $k$ is 2, let
$L_1=k(u)$ with $u^2+u=t$. For all $d$ prime to the characteristic of
$k$, let $L_d=L_1K_d=k(t^{1/d},u)$. Note that $L_d$ is the function field of a hyperelliptic curve $C_d$ over $k$. Using ideas analogous to \cite{UlmerR2}*{\S6}, one checks easily that there is a totally ramified, surjective morphism from a Fermat curve $F_{n}\to\mathcal{C}_d$;
here $n=2d$ if the characteristic of $k$ is not 2 and $n=d$ if the characteristic of $k$ is 2. It follows that the Jacobian of $C_d$ is an isogeny factor of $J_{n}$. Applying the rank formula
\ref{eq:isotrank} and Theorems~\ref{thm:Jd0} and \ref{thm:Jdp} we have the following.

\begin{thm}\label{thm:isotrivial}
Let $k$ be a field and $A_0$ an abelian variety over $k$. If the characteristic of $k$ is $p>0$, assume that $A_0$ is isogenous to a product of $k$-simple abelian varieties each with positive $p$-rank.
Let $A=A\times_k k(t)$ and let $A'$ be the twist of $A$ by the quadratic extension $k(u)/k(t)$ where $u$ satisfies $u^2=t-1$ if the characteristic of $k$ is not 2 and $u^2+u=t$ if the characteristic of $k$ is 2. Then the rank of the Mordell-Weil group $A'(k(t^{1/d}))$ is bounded as $d$ varies through all positive integers relatively prime to the characteristic of $k$.
\end{thm}

\section{Non-isotrivial elliptic curves with bounded ranks in $\mathbb{Z}_\ell$-towers}

\subsection{}
For examples of non-isotrivial elliptic curves with bounded ranks in
$\mathbb{Z}_\ell$ extensions, we consider the curve $E$ discussed in
\cite{UlmerR1} with affine equation
$$y^2+xy=x^3-t$$
over ${\mathbb{F}_p}(t)$.

\bigskip
\begin{thm}~\label{thm:nonisol}
Given $p$ let $S$ be the set of primes $\ell>3$ such that
$p\equiv1\pmod\ell$. If $d$ is a product of powers of primes from
$S$, then the rank of $E({\overline{\mathbb{F}}_p}(t^{1/d}))$ is zero.
\end{thm}

The proof of the theorem will will be given in the rest of this section.

\subsection{}
We use the notation of Subsection~\ref{ss:JacobiSums} on Jacobi sums.
Given $p$, $d$ prime to $p$, and ${\mathbf{a}}=(a_0,\dots,a_3)\in A_{d,2}$, we say that ${\mathbf{a}}$ is ``supersingular'' (some authors would say ``pure'')
if for one (and thus every) $q=p^f\equiv1\pmod{d}$ and all
$s\in(\mathbb{Z}/d\mathbb{Z})^\times$ we have
$$\sum_{i=0}^3\sum_{j=0}^{f-1}\left\langle\frac{sp^ja_i}{d}\right\rangle=2f.$$
If ${\mathbf{a}}$ is supersingular, then for every prime $\wp$ of $\mathbb{Q}(\mu_d)$
over $p$, the valuation $\ord_{\wp}J_q({\mathbf{a}})$ is $f$ and this implies that $J_q({\mathbf{a}})$ is a root of unity times $q$; this is the motivation for the terminology ``supersingular''.

\subsection{}
By \cite{UlmerR1}*{6.4 and 7.7}, if $(d,6p)=1$, then the rank of
$E({\overline{\mathbb{F}}_p}(t^{1/d}))$ is equal to the number of elements
$t\in\mathbb{Z}/d\mathbb{Z}\setminus\{0\}$ such that ${\mathbf{a}}=(t,-6t,2t,3t)$ is supersingular. We are going to show that for suitable $d$ there are no supersingular ${\mathbf{a}}$ of this form by using a descending induction based on the following elementary identity. Suppose that
$a\in\mathbb{Z}/d\mathbb{Z}$, $\ell$ is a prime such that $\ell^2|d$ and $\ell\mathrel{\mathchoice{\not|}{\not|}{\kern-.2em\not\kern.2em|}{\kern-.2em\not\kern.2em|}} a$. Let $H$ be the cyclic subgroup of $(\mathbb{Z}/d\mathbb{Z})^\times$ generated by
$1+d/\ell$. Then we have
$$\sum_{s\in H}\left\langle\frac{sa}{d}\right\rangle
=\left\langle\frac{a}{d/\ell}\right\rangle+\frac{\ell-1}{2}.$$

It follows that if ${\mathbf{a}}=(a_0,\dots,a_3)\in A_{d,2}$, $\ell^2|d$,
$\ell\mathrel{\mathchoice{\not|}{\not|}{\kern-.2em\not\kern.2em|}{\kern-.2em\not\kern.2em|}} a_i$ for all $i$, and if ${\mathbf{a}}$ is supersingular, then its image in $A_{d/\ell}$ is also supersingular. Indeed, we have
$$2f\ell=\sum_{s\in H}\sum_{i=0}^3
\sum_{j=0}^{f-1}\left\langle\frac{sp^ja_i}{d}\right\rangle
=\sum_{i=0}^3\sum_{j=0}^{f-1}
\left\langle\frac{p^ja_i}{d/\ell}\right\rangle+2f(\ell-1)$$
and similarly if $a$ is replaced by $ta$ with $t\in(\mathbb{Z}/d\mathbb{Z})^\times$.

\subsection{}
We can now prove the theorem. Suppose given $p$ and $d$ which is a product of primes in $S$. If the rank of $E({\overline{\mathbb{F}}_p}(t^{1/d}))$ were positive, then we would have a $t\in\mathbb{Z}/d\mathbb{Z}$ such that
${\mathbf{a}}=(t,-6t,2t,3t)$ is supersingular. Without loss of generality we may assume that $t\in(\mathbb{Z}/d\mathbb{Z})^\times$ and then that ${\mathbf{a}}=(1,-6,2,3)$.
Applying the observation of the previous subsection repeatedly, we may
``reduce the level'' and find a $d'$ which is a product of distinct primes from $S$ such that $(1,-6,2,3)\in A_{d',2}$ is supersingular.
But for such a $d'$ we have $f=1$, i.e., $p\equiv1\pmod{d'}$ and with this one easily checks that
$$\sum_{i=0}^3\sum_{j=0}^{f-1}\left\langle\frac{p^ja'_i}{d'}\right\rangle=
\sum_{i=0}^3\left\langle\frac{a'_i}{d'}\right\rangle=1\neq 2f$$
and so we arrive at a contradiction to the assumption the
$E({\overline{\mathbb{F}}_p}(t^{1/d}))$ has positive rank. This completes the proof of the Theorem.

\subsection{}
The theorem shows that for any prime $p$ such that $p-1$ is not a power of $2$ times a power of $3$, there is an elliptic curve over
${\mathbb{F}_p}(t)$ with bounded rank in a ${\mathbb{Z}_\ell}$ tower ${\overline{\mathbb{F}}_p}(t^{1/\ell^n})$ for suitable $\ell$. We will prove a stronger result for certain small
$p$ not of this type (namely $p=2, 3, 5, 7$) in the next section and so it seems likely that this kind of statement holds for all $p$.

In the same direction, it seems quite likely that a more refined analysis would show that given $p$, and for $E$ as above, the rank of
$E({\overline{\mathbb{F}}_p}(t^{1/d}))$ is bounded as $d$ runs through all integers which are products of powers of primes $\ell$ such that no power of $p$ is congruent to $-1$ modulo $\ell$.

Generalizing in another direction, a geometric analysis as in
\cite{UlmerR1}*{\S5} applied to the curves in \cite{UlmerR2}*{\S7}
might allow one to prove a version of Theorem~\ref{thm:nonisol} for higher dimensional abelian varieties.

Finally, we note that it is not hard to deduce from Theorem~\ref{thm:nonisol} that the curve defined over $\mathbb{Q}(t)$ by the equation $y^2+xy=x^3-t$ has bounded rank over ${\overline{\mathbb{Q}}}(t^{1/d})$ as $d$
ranges over all positive integers. We omit the details since similar results were shown by Shioda \cite{Shioda}*{Cor.~9} using closely related techniques.

\section{Non-isotrivial elliptic curves with bounded ranks in ${\hat\mathbb{Z}^{(p)}}$-towers}
\subsection{}
We will use completely different techniques, unrelated to Fermat varieties, to give a few examples of non-isotrivial elliptic curves with bounded ranks in towers ${\overline{\mathbb{F}}_p}(t^{1/d})$ as $d$ ranges over all integers prime to $p$.

\begin{thm}\label{thm:nonisod}
If $p\in\{2,3,5,7,11\}$ then there exists an elliptic curve $E$ over
${\mathbb{F}_p}(t)$ with $j(E)\not\in{\mathbb{F}_p}$ such that the rank of
$E({\overline{\mathbb{F}}_p}(t^{1/d}))$ is zero for all positive integers $d$ prime to
$p$.
\end{thm}

The proof of the theorem, which uses ideas from \cite{Ulmerpd}, will be given in the rest of this section.

\subsection{}
Given an elliptic curve $E$ over ${\mathbb{F}_p}(t)$ with $j(E)\not\in{\mathbb{F}_p}$,
choose a non-zero invariant differential $\omega$ on $E$ and let
$\Delta=\Delta(E,\omega)$ and $A=A(E,\omega)$ be the discriminant and Hasse invariant of $E$; the definition of the latter is reviewed in
\cite{Ulmerpd}*{\S2}. Our assumptions imply that $\Delta$ and $A$ are non-zero elements of ${\mathbb{F}_p}(t)$.

Consider the following conditions on $E$:
\begin{itemize}
\item $E$ has good or multiplicative reduction at $t=0$ and $t=\infty$.
\item At every finite non-zero place of ${\mathbb{F}_p}(t)$, $E$ obtains good reduction over a tamely ramified extension.
\item At every finite non-zero place $v$ of ${\mathbb{F}_p}(t)$, we have
$$\frac{\ord_v(A)}{p-1}-\frac{\ord_v(\Delta)}{12} <\frac{1}{p}.$$
\end{itemize}
Note that the third condition is automatic at places where $E$ has good ordinary reduction, in particular at places where $A$ and
$\Delta$ are units. Note also that if $E$ satisfies these conditions then it continues to satisfy them over the extensions ${\mathbb{F}_q}(t^{1/d})$
for any power $q$ of $p$ and any $d$ prime to $p$.

\subsection{}
It follows from \cite{Ulmerpd}*{Section~3 and the first sentence of Section~6} that an elliptic curve over ${\mathbb{F}_p}(t)$ satisfying the conditions of the previous subsection has rank 0 or 1 over any extension $K={\mathbb{F}_q}(t^{1/d})$. To see this, we consider the Frobenius and Verschiebung isogenies
\begin{equation*}
\xymatrix{E\ar[r]^{Fr}&E^{(p)}\ar[r]^{V}&E}
\end{equation*}
whose composition is multiplication by $p$. Section~3 of
\cite{Ulmerpd} computes the Selmer groups for $Fr$ and $V$ in terms of the reduction types of $E$, $A$, and $\Delta$. Under the conditions of the previous subsection, the results are that $\sel(K,V)=0$ and
$\sel(K,Fr)$ is zero if $E$ has good reduction at 0 or $\infty$ and has order $p$ if $E$ has multiplicative reduction at both 0 and
$\infty$.

We have an exact sequence
$$E^{(p)}(K)\to\sel(K,Fr)\to\sel(K,p)\to\sel(K,V)$$
and so the Selmer group for $p$ is either trivial or of order $p$. In the examples we give below, when $\sel(K,Fr)$ is non-trivial, there is a point of order $p$ in $E^{(p)}(K)$ mapping to a generator of
$\sel(K,Fr)$ and so $\sel(K,p)=0$ and $E(K)$ has rank 0.

\subsection{}
We now give explicit examples of elliptic curves satisfying our conditions.

Suppose $p=2$ and let $E$ be defined by
$$y^2+(t-1)xy+(t-1)^2y=x^3.$$
If $\omega=dx/((t-1)x+(t-1)^2)$, then $A=(t-1)$, $\Delta=t(t-1)^8$,
and $j=(t-1)^4/t$. Standard methods show that $E$ has good, ordinary reduction away from 0, 1, and $\infty$; that $E$ has multiplicative reduction at 0 and $\infty$; and that at $t=1$, $E$ obtains good reduction over an extension with ramification index 3 and the inequality involving $A$ and $\Delta$ is satisfied. The point
$(x,y)=((t-1)^2,(t-1)^3)$ on $E^{(2)}$ has order 2 and maps non-trivially to $\sel(K,Fr)$ and so $\sel(K,2)=0$ for all
$K={\mathbb{F}_q}(t^{1/d})$.

If $p=3$, let $E$ be defined by
$$y^2=x^3+(t-1)^2x^2+t(t-1)^3x.$$
If $\omega=dx/2y$, then $A=(t-1)^2$, $\Delta=-t^2(t-1)^9$, and
$j=-(t-1)^3/t^2$. Standard methods show that $E$ has good, ordinary reduction away from 0, 1, and $\infty$; that $E$ has multiplicative reduction at 0 and $\infty$; and that at $t=1$, $E$ obtains good reduction over an extension with ramification index 4 and the inequality involving $A$ and $\Delta$ is satisfied. The points
$(x,y)=(t^2(t-1)^4,\pm t^2(t-1)^6)$ on $E^{(3)}$ have order 3 and map non-trivially to $\sel(K,Fr)$ and so $\sel(K,3)=0$ for all
$K={\mathbb{F}_q}(t^{1/d})$.

If $p=5$, let $E$ be defined by
$$y^2=x^3+3(t-1)^4x+(t+1)(t-1)^5.$$
If $\omega=dx/2y$, then $A=(t-1)^4$, $\Delta=2t(t-1)^{10}$, and
$j=(t-1)^2/2t$. Standard methods show that $E$ has good, ordinary reduction away from 0, 1, and $\infty$; that $E$ has multiplicative reduction at 0 and $\infty$; and that at $t=1$, $E$ obtains good reduction over an extension with ramification index 6 and the inequality involving $A$ and $\Delta$ is satisfied. The points with
$x$ coordinate $2(t-1)^8(t^2\pm2t-1)$ on $E^{(5)}$ have order 5 and map non-trivially to $\sel(K,Fr)$ and so $\sel(K,5)=0$ for all
$K={\mathbb{F}_q}(t^{1/d})$.

If $p=7$, let $E$ be defined by
$$y^2=x^3+(t-1)(t+1)^3x+5(t-1)(t+1)^5.$$
If $\omega=dx/2y$, then $A=(t-1)(t+1)^5$, $\Delta=2(t-1)^2(t+1)^9$,
and $j=4(t-1)$. Standard methods show that $E$ has good, ordinary reduction away from $\pm1$ and $\infty$; that $E$ has multiplicative reduction at $\infty$; that at $t=1$, $E$ obtains good reduction over an extension with ramification index 6 and the inequality involving
$A$ and $\Delta$ is satisfied; and that at $t=-1$, $E$ obtains good reduction over an extension with ramification index 4 and the inequality involving $A$ and $\Delta$ is satisfied. It follows that
$\sel(K,7)=0$ for all $K={\mathbb{F}_q}(t^{1/d})$.

If $p=11$, let $E$ be defined by
$$y^2=x^3+8(t-1)(t+1)^3x+2(t-1)(t+1)^5.$$
If $\omega=dx/2y$, then $A=(t-1)^2(t+1)^8$, $\Delta=9(t-1)^2(t+1)^9$,
and $j=5(t-1)$. Standard methods show that $E$ has good, ordinary reduction away from $\pm1$ and $\infty$; that $E$ has multiplicative reduction at $\infty$; that at $t=1$, $E$ obtains good reduction over an extension with ramification index 6 and the inequality involving
$A$ and $\Delta$ is satisfied; and that at $t=-1$, $E$ obtains good reduction over an extension with ramification index 4 and the inequality involving $A$ and $\Delta$ is satisfied. It follows that
$\sel(K,11)=0$ for all $K={\mathbb{F}_q}(t^{1/d})$.

\subsection{}
The theory of modular forms modulo $p$ suggests that the strategy employed in this section will not work for large $p$. Nevertheless, I conjecture that for all $p$
there are elliptic curves (indeed, absolutely simple abelian varieties of any dimension) over ${\mathbb{F}_p}(t)$ which have bounded Mordell-Weil ranks in the tower ${\mathbb{F}_q}(t^{1/d})$.

\begin{bibdiv}
\begin{biblist}
[\resetbiblist{Ulm05}]

\bib{Aoki}{article}{
author={Aoki, N.},
title={Simple factors of the Jacobian of a Fermat curve and the Picard number of a product of Fermat curves},
journal={Amer. J. Math.},
volume={113},
date={1991},
pages={779\ndash 833},
}

\bib{BayerGonzalez}{article}{
author={Bayer, P.},
author={Gonz{\'a}lez, J.},
title={On the Hasse-Witt invariants of modular curves},
journal={Experiment. Math.},
volume={6},
date={1997},
pages={57--76},
}

\bib{Conrad}{article}{
author={Conrad, B.},
title={Chow's $K/k$-image and $K/k$-trace, and the Lang-N\'eron theorem},
journal={Enseign. Math. (2)},
volume={52},
date={2006},
pages={37\ndash 108},
}

\bib{Davenport}{book}{
author={Davenport, H.},
title={Multiplicative number theory},
series={Graduate Texts in Mathematics},
volume={74},
publisher={Springer-Verlag},
place={New York},
date={2000},
pages={xiv+177},
}

\bib{Ellenberg}{article}{
author={Ellenberg, J. S.},
title={Selmer groups and Mordell-Weil groups of elliptic curves over towers of function fields},
date={2005},
status={Preprint, to appear in {\it Compositio Mathematica\/}},
label={Ellen}
}

\bib{Fastenberg}{article}{
author={Fastenberg, L. A.},
title={Mordell-Weil groups in procyclic extensions of a function field},
journal={Duke Math. J.},
volume={89},
date={1997},
pages={217\ndash 224},
}

\bib{HardyWright}{book}{
author={Hardy, G. H.},
author={Wright, E. M.},
title={An introduction to the theory of numbers},
publisher={The Clarendon Press Oxford University Press},
place={New York},
date={1979},
pages={xvi+426},
}

\bib{Ogus}{article}{
author={Ogus, A.},
title={Hodge cycles and crystalline cohomology},
pages={357--414},
book={
title={Hodge cycles, motives, and Shimura varieties},
author={Deligne, Pierre},
author={Milne, James S.},
author={Ogus, Arthur},
author={Shih, Kuang-yen},
series={Lecture Notes in Mathematics},
volume={900},
publisher={Springer-Verlag},
place={Berlin},
},
date={1982},
}

\bib{Shioda}{article}{
author={Shioda, T.},
title={An explicit algorithm for computing the Picard number of certain algebraic surfaces},
journal={Amer. J. Math.},
volume={108},
date={1986},
pages={415\ndash 432},
}

\bib{Silverman1}{article}{
author={Silverman, J. H.},
title={A bound for the Mordell-Weil rank of an elliptic surface after a cyclic base extension},
journal={J. Algebraic Geom.},
volume={9},
date={2000},
pages={301\ndash 308},
}

\bib{Silverman2}{article}{
author={Silverman, J. H.},
title={The rank of elliptic surfaces in unramified abelian towers},
journal={J. Reine Angew. Math.},
volume={577},
date={2004},
pages={153\ndash 169},
}

\bib{Stiller}{article}{
author={Stiller, P. F.},
title={The Picard numbers of elliptic surfaces with many symmetries},
journal={Pacific J. Math.},
volume={128},
date={1987},
pages={157\ndash 189},
}

\bib{TS}{article}{
author={Tate, J. T.},
author={Shafarevitch, I. R.},
title={The rank of elliptic curves},
language={Russian},
journal={Dokl. Akad. Nauk SSSR},
volume={175},
date={1967},
pages={770\ndash 773},
}

\bib{TateHT}{article}{
author={Tate, J. T.},
title={Classes d'isog\'enie des vari\'et\'es ab\'eliennes sur un corps fini (d'apr\`es T.~Honda)},
book={
title={S\'eminaire Bourbaki. Vol. 1968/69: Expos\'es 347--363},
series={Lecture Notes in Mathematics, Vol. 179},
publisher={Springer-Verlag},
place={Berlin},
date={1971},
},
pages={95--110},
label={Tat68}
}

\bib{Ulmerpd}{article}{
author={Ulmer, D.},
title={$p$-descent in characteristic $p$},
journal={Duke Math. J.},
volume={62},
date={1991},
pages={237\ndash 265},
}

\bib{UlmerR1}{article}{
author={Ulmer, D.},
title={Elliptic curves with large rank over function fields},
journal={Ann. of Math. (2)},
volume={155},
date={2002},
pages={295\ndash 315},
}

\bib{UlmerR2}{article}{
author={Ulmer, D.},
title={$L$-functions with large analytic rank and abelian varieties with large algebraic rank over function fields},
date={2005},
label={Ulmer},
status={Preprint, to appear in {\it Inventiones Mathematicae\/}},
}

\bib{Washington}{book}{
author={Washington, L. C.},
title={Introduction to cyclotomic fields},
series={Graduate Texts in Mathematics},
volume={83},
publisher={Springer-Verlag},
place={New York},
date={1997},
pages={xiv+487},
}

\bib{WeilNS}{article}{
author={Weil, A.},
title={Numbers of solutions of equations in finite fields},
journal={Bull. Amer. Math. Soc.},
volume={55},
date={1949},
pages={497\ndash 508},
}

\end{biblist}
\end{bibdiv}

\end{document}
