


% \subsection{Vector Field Representations}
% \subsection{Update Fields}
% \subsection{Commitment Fields}
\label{sec:vecrep}

% Suppose we learn $\phi_1$, and then $\phi_2$ (for simplicity, say with the same confidence $\chi$). 
Suppose we learn $\phi_1$ (with confidence $\chi_1$), and then $\phi_2$ (with confidence $\chi_2$). 
% Is this the same as learning $\phi_2$ and then $\phi_1$? 
Is this the same as learning them in the opposite order?
This is true for Shafer (\cref{ex:shafer}), which 
is perhaps a reason to find belief functions attractive---but, in general, 
observing inputs in different orders yields different results.
Humans tend to have a recency bias: 
more recent observations have a stronger influence on beliefs;
this property is also readily seen in \cref{ex:prob-simple,ex:kalman}.
But often, such as when we recieve both pieces of information at
once, we would like to update using that combined information.
% It turns out that there is already a natural way to do this, even
It turns out that we already have the tools 
to do this in a natural way, even
% the update function 
if $\phi_1$ and $\phi_2$ do not commute.  
% if the order in which one observes $\phi_1$ and $\phi_2$ matters.
% if $\phi_1$ and $\phi_2$ do not commute.  

% We now investigate 
We now turn to an equivalent representation of 
flow
% or fractional 
update functions, which, among
other things, will ultimately
yield a natural way of
% orderlessly combine observations, and 
orderlessly learning $\phi_1$ and $\phi_2$ together, and weighted by relative
confidence. 
%
At a technical level, we show how to
extend an arbitrary update function $F$, that handles inputs $\Phi$,
% to a set $\ext\Phi \supseteq \Phi$ with some algebraic operations.
to handle a more expressive set of inputs $\ext\Phi \supseteq \Phi$
closed under new operations of
orderless combination ($\oplus$), and rescaling by relative confidence ($\cdot$).

In \cref{ax:diffble}, we assumed that $\Theta$ has a differentiable
structure; thus, it makes sense to talk about its tangent space
%joe3: please add parens!
%oli3: parens are non-standard in this context. See Lee 2013,
%   which is the standard reference on manifolds, or even the
%   wikipedia page.
$T\Theta$,
which consists of pairs $(\theta, \mat v)$ where
$\theta \in \Theta$, and $\mat v$,
% is intuitively an infinitessial direction rooted at $\theta$.
intuitively, is a direction that one can travel in $\Theta$ beginning at $\theta$
% tangent to $\Theta$
% rooted at $\theta$
\parencite[\S3]{lee2013smooth}.
% structure; for ease of presentation, suppose that it is an $n$-dimensional manifold \parencite{lee2013smooth}.
% For a smooth manifold $M$ (such as the space $\Delta \X$ of distributions over $\X$),
% and a point $p \in M$, we follow convention by writing $T_p M$ for the tangent space to $M$ at point $p$ \parencite{lee2013smooth}, and % $TM := \sqcup_{p \in M} (p, T_p M)$
% $TM := \sum_{p \in M} T_p M$ for the full tangent bundle over $M$.
%
% A vector field over $M$ is a smooth map $\mat v : M \to T M$ assigning a tangent vector $\mat v(p) \in T_p M$, to every point $p \in M
%
A vector field over $\Theta$ is then a
% smooth
differentiable
map $X : \Theta \to T \Theta$
assigning a tangent vector $X(\theta) = (\theta, \mat v) \in T\Theta$
to every $\theta \in \Theta$.
\commentout{
	The set of all vector fields over $\Theta$ is denoted $\mathfrak X(\Theta)$
	 % and is closed under linear combination
	 and forms a vector space.
	\parencite[\S8]{lee2013smooth}
	\unskip.
	}

% There is a close relationship between additive confidence and such vector fields.
There is a close relationship between additive confidence and such vector fields.
% A vector field is called \emph{complete} if it generates a global flow.
% , or equivalently, a smooth section of the projection map $\pi : T M \to M$, where $\pi((p, v)) = p$.
\cref{ax:additivity}
% (and indeed even \cref{ax:seq-for-more})
implies that the behavior of flow update functions is determined by the
way it handles updates of small confidence.
So, in a sense, the only thing we need to know about
a flow update function is how it handles infinitessimal confidences,
which is to say, its derivative at zero confidence---which can be
viewed as a vector field.
More precisely,
given a flow update function $F$, and observation $\phi$,
we can define the \emph{vector field of $\phi$} by
% differential of $F_\phi$
% intuitively represents an update with infinitessimal confidence,
% and is a vector field
\begin{equation}
	F'_\phi
	:=
	\theta \mapsto
	\frac{\partial}{\partial \chi} F_{\theta}^{\chi} \Big|_{\chi=0}
	\qquad\in  \mathfrak X(\Theta)
	.
	\label{eq:f-field}
\end{equation}
Moreover, we can recover $F_\phi$ via the integral curves of $F'_\phi$.
% In other words, if we knew only the vector field $F'_\phi$,
% we could
% because $F_\phi$ is the unique function satsifying \eqref{eq:f-field}.

\begin{fact}[{\citeauthor[Thm 9.12]{lee2013smooth}}]
	If $X \in \mathfrak X(\Theta)$,
	there is at most one
	 % function
	$f : [0,\infty) \times \Theta \to \Theta$
	such that for all $\theta \in \Theta$ and $a,b\ge 0$,
	\[
	f(a, f(b, \theta)) = f(a+b,\theta)
		~~\text{and}~~
	\frac{\partial}{\partial \chi}
	 	f(\chi,\theta)
		% \underset{\chi=0}|
		\Big|_{\chi{=}0}
		\!\!= X(\theta)
		.
	\]
	\label{fact:unique-integral-curves}
\end{fact}
\begin{coro}
	% Suppose $X_\phi \in \mathfrak X(\Theta)$ be a vector field.
	% Let $F$ be a flow-update rule.
	% Suppose $X_\phi \in \mathfrak X(\Theta)$ be a vector field.
	% Then there is at most one flow-update function satisfying \eqref{eq:f-field}.
	% Fix the vector field $X := F'_{\phi}$.
	% $F$ is the only flow-update rule satisfying \eqref{eq:f-field}.
	%%%v1
	% Let $F$ be a flow update function, and fix the vector field $F'_{\phi}$.
	% Then $F_\phi$ is the only flow update function satisfying \eqref{eq:f-field}.
	%%%v2
	If $F_{\phi_1}$ and $F_{\phi_2}: \Theta \times[0,1] \to \Theta$ are distinct,
	then so are $F'_{\phi_1}$ and $F'_{\phi_2}
	 % \in \mathfrak X(\Theta)
	$.
	%%%v3
	% If $F$ is the uni
	\label{fact:unique-flow-for-vfield}
\end{coro}

% Therefore, \cref{theorem:add-reparam}
Thus, every flow update function $F$ can be equivalently represented
by its differential $F'$, a collection of vector fields.
% \begin{prop}
% 	% Let $F$ be a flow-update rule.
% 	% Then, there is a bijective correspondence between
% 	There is a biective correspondence between
% 	flow-update rules
% 	% $F : \Phi \times[0,\infty] \times \Theta \to \Theta$.
% 	and
% 	$\Phi$-indexed families of vector fields $X : \Phi \to \mathfrak X(\Theta)$.
% % Every update rule $F : \Phi \times \mathbb R \to (\Theta  \to \Theta)$
% % satisfying \cref{ax:zero,ax:additivity,ax:diffble} corresponds to a unique
% % $\Phi$-indexed collection of vector fields
% %     $F' : \Phi \times \Theta \to T\Theta$
% \[
% 	X()
% \]
% \end{prop}
% \begin{coro}\label{thm:vecrep}
%     There is a natural bijection between
%     % update rules $F : \Phi \times \mathbb R \to \Delta \X \to \Delta \X$
%     update rules $F : \Phi \times \mathbb R \to (\Theta  \to \Theta)$
%         satisfying \cref{ax:zero,ax:additivity,ax:diffble},
%     and $\Phi$-indexed collections of complete vector fields
%         % $\{ F'_\phi : \Delta X \to T \Delta X \}_{\phi \in \Phi}$%
%         % $\{ F'_\phi : \Theta \to T \Theta \}_{\phi \in \Phi}$%
%         $ F' :  \Phi \times \Theta \to T \Theta$%
%         % $F' : \Phi \to \Delta\X \to T\Delta \X$%
%     .
% \end{coro}
% In the language of
%
% Not all vector fields can be integrated to get an update function
%
% \begin{coro}\label{thm:vecrep}
% There is a bijective correspondence between udpate rules satisfying \cref{ax:zero,ax:additivity,ax:diffble} and $\Phi$-indexed collections of \textbf{complete} vector fields.
% \end{coro}
% We call $F'$ the \emph{vector field representation} of an update function $F$.
% This vector field representation of an update function
It may seem counter-intuitive that $F'_\phi$,
which no longer explicitly mentions confidence at all,
can capture confidence.
 % In a sense, it does so by specifying
But it does---in a sense, by specifying
everything about the update \emph{except} for the degree of confidence.
This vector field representation is useful for two reasons:
at a practical level, it gives us a natural extension of $\Phi$
that allows us deal with ``mixtures'' of observations and commonly arise.
At a deeper level, it will enable us to describe and classify
the flow update functions on $\Theta$.
% Having separated the confidence from the mechanics of the update,
% this vector field representation allows us to describe and
% classify update functions on $\Theta$

% \subsection{Orderless Combination of Observations}
% \textbf{Orderless Combination of Observations.}
One important feature of vector fields is that they
% form a vector space, and so
can be linearly combined to form new vector fields.
% Therefore, flow update functions,
Since in the presence of a flow update function,
observations correspond to vector fields,
observations also inherrit this structure.

% The first way of combining
From scalar mutliplication, we get a way of rescaling
inputs
% $\phi$
by a ``relative confidence'' $k
 % \in [0,\infty)
$.
% \begin{prop}
	% Suppose $F$ is a flow udpate rule.
% For $\phi \in \Phi$, we can extend $F$
Concretely, given $\phi \in \Phi$ and $k \in [0,\infty)$,
 % given $k$ and $\phi$,
define a new observation
% $k\cdot\phi \in \ext\Phi$
$k\cdot\phi$
% and extend $F$ to handle it by:
and extend $F$ to
a function $\ext F$ that handles it by:
\[
	F^{\chi}_{k\cdot\phi}(\theta) := F^{k\chi}_{\phi}(\theta)
	,\quad\text{or equivalently,}\quad
	F'_{k\cdot \phi} := k F'_{\phi}
	.
\]
% \end{prop}
Note that if $k > 0$, the rescaled input
$k\cdot \phi$ behaves the same way that $\phi$
does for extreme values of confidence,
since $k 0 = 0$ and $k\infty = \infty$.

% In this way, the set $\Phi$ inherits
% the additivity of the update rule in the form of scalar multiplication.
% It turns out more is possible: updates inherit the entire vector space structure.


% The second way of combining propositions is to ``observe them concurrently''.
% The second way we
% Making use of vector field addition, we can also
From vector field addition, we get a natural way to combine observations.
Up to now, we have only been able to combine observations provided they are
both of the same input $\phi$ (e.g., via \cref{ax:additivity}).
The vector field representation allows us to do this for distinct inputs.
% This can be quite powerful.
% In particular, given \cofunc s $F, G : \mathbb R \to \Theta$, we can define
% $F \oplus G$ via the vector field $(F \oplus G)' = F' + G'$.
% \begin{defn}
% 	For $\phi_1, \phi_2 \in \Phi$, we extend $F$ to
% 	$\phi_1 \oplus \phi_2$
% \end{defn}
Concretely,
given $\phi_1, \phi_2 \in \Phi$, we can form a new input
% $\phi_1 \oplus \phi_2 \in \ext \Phi$,
$\phi_1 \oplus \phi_2$
and extend $F$ to handle it by taking
% $F'_{\phi_1 \oplus \phi_2} := F'_{\phi_1} + F'_{\phi_2}$.
its vector field
$F'_{\phi_1 \oplus \phi_2}$
to be the sum $F'_{\phi_1} + F'_{\phi_2}$ of the vector fields of $\phi_1 $ and $\phi_2$.
Unlike before, there is no easy way to describe the
flow update function
$F_{\phi_1\oplus\phi_2}$ directly,
but \cref{fact:unique-integral-curves} implies that there's a unique
such function, if it exists.
We now prove that it does, except possibly for full confidence.
% limits to a point,
% allowing it to be continously extended to $\infty$.

\begin{prop}
	If $F$ is a flow update function, and $\phi_1, \phi_2 \in \Phi$,
	then there exists a (unique) function
	$F_{\phi_1 \oplus \phi_2}
	 	: [0, \infty) \times \Theta \to \Theta$
	such that
	$F'_{\phi_1 \oplus \phi_2} = F'_{\phi_1} + F'_{\phi_2}$.
\end{prop}
% \begin{proof}
%
% \end{proof}
The problem is that there may not be any way to continuously extend
% this function to handle full confidence (\cref{ax:cont}).
% In \cref{sec:loss-repr}, we will see another representation
% of confidence functions, predicated on condition
% sufficient to ensure that this combination is always defined;
% for now, we leave $\phi_1 \oplus \phi_2$
% undefined if $\lim_{t \to \infty} F^{t}_{\phi_1 \oplus \phi_2}$ does not exist.
this funcction to handle full confidence---that is,
$\lim_{\beta \to \infty} F^{\beta}_{\phi_1 \oplus \phi_2}$ may not exist.
Thus, it may not be meaningful to observe $\phi_1 \oplus \phi_2$ with full confidence.
For now, we leave $\phi_1 \oplus \phi_2$ undefined in such cases,
but in \cref{sec:loss-repr}, we will see another representation
of certain update rules predicated on a condition sufficient
to ensure that these limits do exist, and $\oplus$ is always defined.

% Inputs $\phi_1$ and $\phi_2$ are said to \emph{commute} if
% $F_{\phi_1}^{\chi_1} \circ F_{\phi_2}^{\chi_2} \ne  F_{\phi_2}^{\chi_2} \circ F_{\phi_1}^{\chi_1}$ for all $\chi_1, \chi_2$.

\begin{prop}
	% For $\bot < \chi_1, \chi_2  < \top$,
	If $F$ is a flow update function
	% , and $\chi_1, \chi_2, \chi_1', \chi_2' \in (\bot, \top)$,
	then the following are equivalent:
	\begin{enumerate}
		\item $F_{\phi_1}^{\chi_1} \circ F_{\phi_2}^{\chi_2} =  F_{\phi_2}^{\chi_2} \circ F_{\phi_1}^{\chi_1}$
		for some $\chi_1, \chi_2 \notin \{\bot,\top\}$.
		% \item $F_{\phi_1}^{\chi_1'} \circ F_{\phi_2}^{\chi_2'} =  F_{\phi_2}^{\chi_2'} \circ F_{\phi_1}^{\chi_1'}$
		\item $F_{\phi_1}^{\chi_1} \circ F_{\phi_2}^{\chi_2} =  F_{\phi_2}^{\chi_2} \circ F_{\phi_1}^{\chi_1}$
		for all $\chi_1, \chi_2 \notin \{\bot,\top\}$.

		\item The vector fields $F'_{\phi_1}$ and $F'_{\phi_2}$ commute.
		% i.e.,
		% $F'_{\phi_1}(F'_{\phi_2}(f)) = F'_{\phi_2}(F'_{\phi_1}(f))$ for every smooth function $f$.

		\item
			% $\phi_1\oplus\phi_2$ is defined and
			For all $\chi \in \mathbb R$, 
			$F^{\chi}_{\phi_1} \circ F^{\chi}_{\phi_2} = F^\chi_{\phi_1\oplus\phi_2}$.
	\end{enumerate}
	If this condition holds, then $\phi_1$ and $\phi_2$ are said to \emph{commute}.
\end{prop}

Note that $\phi_1 \oplus \phi_2 = \phi_2 \oplus \phi_1$ when either is
defined, so $\oplus$ provides a way of combining observations
orderlessly, even in cases where $\phi_1$ and $\phi_2$ do not commute.
% (that is, ).
% And when $\phi_1$ and $\phi_2$
% already do not depend on order, $\phi_1\oplus \phi_2$ has the same effect
% as $\phi_1$ followed by $\phi_2$.
And, when they do, the combined observation $\phi_1\oplus \phi_2$
is equivalent to observing $\phi_1$ and $\phi_2$ in either order.

\begin{prop}
	% If $\phi_1$ and $\phi_2$ commute
	% (i.e., $F^{\chi}_{\phi_1} \circ F^{\chi}_{\phi_2} =
	%  	F^{\chi}_{\phi_2} \circ F^{\chi}_{\phi_1}$ for all $\chi$)
	% \unskip, then both are equal to $F^{\chi}_{\phi_1 \oplus \phi_2}$
	% for all $\chi
	%  % \in [0,\infty]
	%  $.
	If $F^{\chi}_{\phi_1} \circ F^{\chi}_{\phi_2} =
	 	F^{\chi}_{\phi_2} \circ F^{\chi}_{\phi_1}$,
	% both equal
	then both updates are equal to
	 $F^{\chi}_{\phi_1 \oplus \phi_2}$. % can add \! before period to fit on one line.
	\commentout{That is,
	\[
		F^{\chi}_{\phi_1}( F^{\chi}_{\phi_2}(\theta))
		=
		F^{\chi}_{\phi_2 \oplus \phi_1} (\theta)
		=
		F^{\chi}_{\phi_1 \oplus \phi_2} (\theta)
		=
		F^{\chi}_{\phi_1}( F^{\chi}_{\phi_2}(\theta))
		.
	\]}
\end{prop}

Intuitively, $\phi_1 \oplus \phi_2$ is a ``mixture observation'' containing
one part $\phi_1$ and one part $\phi_2$. This intuition is made
precise by the following proposition,
which shows $\phi_1\oplus\phi_2$ is equivalent to an infinitely
fine interleaving of $\phi_1$ and $\phi_2$ updates.

\begin{prop}
	Let $\phi_1, \phi_2 \in \Phi$ be inputs.
	For $t > 0$ and $n \in \mathbb N$, let
	$u_t := F_{\phi_1}^t \circ F_{\phi_2}^t
	% : \Theta \to \Theta
	$
 	represent
	a confidence-$t$ update $\phi_1$ followed
	by a confidence-$t$ update of $\phi_2$,
	% an update with $\phi_1$ followed by an update with $\phi_2$,
	% both made with confidence $t$
	and
	$u_t^{(n)}(\theta) := u_t \circ\ldots\circ u_t(\theta)$
	be denote $n$ sequential applications of $u_t$ to $\theta$.
	% Symmetrically, let $v_t$ and $v_t^{(n)}$ represent the
	Then,
	\[
		F_{\phi_1 \oplus \phi_2}^\chi(\theta) =
			\lim_{n \to \infty} u_{\chi/n}^{(n)}(\theta)
		%%%v1
		% F_{\phi_1 \oplus \phi_2}^\chi =
		% \lim_{n \to \infty}~~
		% \overbrace{u_{\nf \chi n}\circ u_{\nf \chi n} \circ\cdots\circ
		% 	u_{\nf \chi n}}^{\text{$n$ times}}
		%%%v2
		%  (F^{\frac\chi n}_{\phi_1} \circ F^{\frac\chi n}_{\phi_2})
		%  \circ
		%  (F^{\frac\chi n}_{\phi_1} \circ F^{\frac\chi n}_{\phi_2})
		%  \circ
		%  \cdots
		%  \circ
		%  (F^{\frac\chi n}_{\phi_1} \circ F^{\frac\chi n}_{\phi_2})
		.
	\]
\end{prop}

% \bigskip
% \verb|< END OF COMPREHENSIBLE DRAFT >|
% \clearpage

% \subsection{Commutative Update Rules}
%
% All differentiable update rules are ``locally'' commutative, in the sense that the difference between
% $F_{\phi_1}^\epsilon \circ F_{\phi_2}^\epsilon$ and
% $F_{\phi_2}^\epsilon \circ F_{\phi_1}^\epsilon$ goes to zero as $\epsilon \to 0$.
% This is an immediate consequence of differentiability and the fact that they share a limit point (the identity function).
%
% If we fix a commutative and differentiable update rule $F$, and an initial point $\theta_0$, then the space $\mathbb R^\Phi$ of real-valued vectors over $\Phi$,
% serves as a coordinate system for $\Theta$.

%
% Not all update rules of interest are commutative, even if otherwise well-behaved.
%
% \begin{example}
%     The inconsistency-reduction update rule, $\tau$, is not commutative, but it is differentiable, additive, invertable, and even conservative.
% \end{example}
