\documentclass[accepted]{uai2023} % for initial submission
% \documentclass[preprint]{uai2023} % after acceptance, for a revised
% version; also before submission to
% see how the non-anonymous paper
% would look like
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2023} % ptmx math instead of Computer
% Modern (has noticeable issues)
% \documentclass[mathfont=newtx]{uai2023} % newtx fonts (improves upon
% ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

% Recommended, but optional, packages for figures and better typesetting:
\usepackage{microtype}
\usepackage{bmpsize}
\usepackage{booktabs}
\hypersetup{
    colorlinks=true,
    linkcolor=blue,
    urlcolor=blue,
    citecolor=blue,
    anchorcolor=blue}

% \usepackage{xr}

% % Command from here: https://www.overleaf.com/learn/how-to/Cross_referencing_with_the_xr_package_in_Overleaf#When_should_I_use_the_xr_package?
% \makeatletter
% \newcommand*{\addFileDependency}[1]{% argument=file name and extension
% \typeout{(#1)}% latexmk will find this if $recorder=0
% % however, in that case, it will ignore #1 if it is a .aux or 
% % .pdf file etc and it exists! If it doesn't exist, it will appear 
% % in the list of dependents regardless)
% %
% % Write the following if you want it to appear in \listfiles 
% % --- although not really necessary and latexmk doesn't use this
% %
% \@addtofilelist{#1}
% %
% % latexmk will find this message if #1 doesn't exist (yet)
% \IfFileExists{#1}{}{\typeout{No file #1.}}
% }\makeatother

% \newcommand*{\myexternaldocument}[1]{%
% \externaldocument{#1}%
% \addFileDependency{#1.tex}%
% \addFileDependency{#1.aux}%
% }
% \myexternaldocument{appendix/appendix.tex}

% Attempt to make hyperref and algorithmic work together better:
\newcommand{\theHalgorithm}{\arabic{algorithm}}

% For theorems and such
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{mathtools}
\usepackage{amsthm}
\usepackage{algorithmic,algorithm}

% if you use cleveref..
%\usepackage[capitalize,noabbrev]{cleveref}
\usepackage[round]{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}

\usepackage[inline]{enumitem}
\usepackage[table, dvipsnames]{xcolor}
\usepackage{color}

\usepackage{amsmath}
\usepackage{float}
\usepackage{adjustbox}
\usepackage{caption} 
% \usepackage{subcaption}
% \usepackage{subfig}
\usepackage{mathtools, nccmath}
\usepackage{tikz}
\usepackage[algo2e, ruled,vlined,boxed,linesnumbered]{algorithm2e}
%\usepackage[noend]{algorithmic}
\SetArgSty{textnormal}
\usepackage{listings}
\usepackage{multicol}
\usepackage{wrapfig}
\usepackage{enumitem}
\usepackage{makecell}
\usepackage{upgreek}

\usepackage[toc,page]{appendix}      % for appendix

\usepackage{amsmath}		% for AMS macros
\usepackage{amssymb}		% for AMS symbols
\usepackage{amsfonts}		% for AMS fonts
\usepackage{amsthm}		% for theorems

\usepackage{mathtools}		% for advanced math
\usepackage{dsfont}		% for blackboard bold font

\usepackage{acronym}		% for acronyms

\renewcommand*{\aclabelfont}[1]{\acsfont{#1}}		% for acronym label font
\newcommand{\acli}[1]{\textit{\acl{#1}}}		% for italicized acro
\newcommand{\aclip}[1]{\textit{\aclp{#1}}}		% for italicized acro (plural)
\newcommand{\acdef}[1]{\textit{\acl{#1}} \textup{(\acs{#1})}\acused{#1}}		% for acro def
\newcommand{\acdefp}[1]{\textit{\aclp{#1}} \textup{(\acsp{#1})}\acused{#1}}	% for acro def (plural)

\newcommand{\afterhead}{.}
\newcommand{\ackperiod}{}		% for period bug in acknowledgments
\usepackage{titlesec}
\newcommand{\para}[1]{\smallskip\paragraph{\textbf{#1\afterhead}}}


\usepackage{quoting}			% for managing spaces with quotations
\quotingsetup{vskip=\medskipamount}

\usepackage{stmaryrd}		% for extra symbols
\usepackage{wasysym}		% for extra symbols

\usepackage{booktabs}		% for better tables

\usepackage[sort&compress,capitalize,nameinlink]{cleveref}		% for cleveref formatting
\crefname{assumption}{Assumption}{Assumptions}
%\crefname{algorithm}{Alg.}{Algs.}
\crefname{algo}{Algorithm}{Algorithms}
\crefname{example}{Example}{Examples}
\crefname{method}{Method}{Methods}
\newcommand{\crefrangeconjunction}{\textendash}		% for cleveref conjunctions
%\crefrangeformat{equation}{\upshape(#3#1#4)\textendash(#5#2#6)}
\creflabelformat{assumption}{\upshape(#2#1#3\upshape)}

\crefname{assumptionenum}{Assumption}{Assumptions}
\creflabelformat{assumptionenum}{#2#1#3}

\crefname{item}{}{}
\creflabelformat{item}{#2#1#3}

\crefname{eq}{}{}
\creflabelformat{eq}{\upshape(#2#1#3\upshape)}


% \def\endenv{\hfill{\small$\blacktriangle$}}
\usepackage{thmtools}		% for theorem tools
\usepackage{thm-restate}		% for restating theorems

\newtheorem{theorem}{Theorem}		% for theorems
\newtheorem{corollary}{Corollary}		% for corollaries
\newtheorem{lemma}{Lemma}		% for lemmas
\newtheorem{proposition}{Proposition}		% for propositions

\newtheorem{conjecture}{Conjecture}		% for conjectures
\newtheorem{claim}{Claim}		% for claims

% \newtheorem{example}{{\small$\blacktriangledown$} Example}		% for examples
% \newtheorem{algo}{{\small$\blacktriangledown$} Algorithm}	
\newcommand{\needref}{{\color{red}\upshape\textbf{[??]}}\xspace}	% for missing refs
\newcommand{\attn}{{\color{red}\upshape\textbf{[!!]}}\xspace}		% for attention

%\newcommand{\debug}[1]{{\color{MyRed}#1}}		% for macro coloring
\newcommand{\debug}[1]{#1}		% for removing macro coloring

% \newcommand{\commtag}[1]{\tag*{\small\{#1\}}}

% new commands
\newcommand{\colcircle}[1]{\tikz\draw[#1, fill=#1] (0,0) circle (.5ex);}
\definecolor{darkblue}{HTML}{1A254B}
\definecolor{lightblue}{HTML}{A7BED3}
\definecolor{blue}{HTML}{114083}
\definecolor{green}{HTML}{81B5AE}
\definecolor{pink}{HTML}{F2545B}
\definecolor{red}{HTML}{A4243B}
\definecolor{airforceblue}{rgb}{0.36, 0.54, 0.66}
\definecolor{thistle}{rgb}{0.85, 0.75, 0.85}
\definecolor{ticklemepink}{rgb}{0.99, 0.54, 0.67}
\definecolor{thulianpink}{rgb}{0.67, 0.24, 0.43}
\definecolor{tealblue}{rgb}{0.11, 0.36, 0.43}
\newcommand{\bl}[1]{\textcolor{tealblue}{#1}}
\newcommand{\rl}[1]{\textcolor{thulianpink}{#1}}


% math definitions
\newcommand{\defeq}{\vcentcolon=}
%\newcommand{\norm}[1]{\left\lVert#1\right\rVert}
\def\Pcal{\mathcal{P}}
\def\Ncal{\mathcal{N}}
\def\bP{\mathbf{P}}
\def\bC{\mathbf{C}}
\def\RR{\mathbb{R}}
\def\cW{\overline{W}_{\varepsilon}}
\def\We{W_{\varepsilon}}
\def\ones{\mathbf{1}}
\DeclarePairedDelimiterX{\dotp}[2]{\langle}{\rangle}{#1, #2}
\DeclareMathOperator*{\argminB}{argmin}
\DeclareMathOperator*{\argmaxB}{argmax} 
\DeclareMathOperator*{\argmin}{argmin} 
\def\rset{\mathbb{R}}
\def\rmd{\mathrm{d}}
\def\bfX{\mathbf{X}}
\def\Leb{\mathrm{Leb}}
\newcommand{\expe}[1]{\mathbb{E}[#1]}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% MACROS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%----------------------------------------------------------------------
%%% MACROS
%----------------------------------------------------------------------
% !TEX root = ./Main.tex


%**********************************************************************
%***    MACROS: GENERAL
%**********************************************************************
\newcommand{\newmacro}[2]{\newcommand{#1}{\debug{#2}}}		% for shorthand definitions
\newcommand{\newop}[2]{\DeclareMathOperator{#1}{\debug{#2}}}		% for shorthand definitions

%----------------------------------------------------------------------
%% Delimiters
%----------------------------------------------------------------------
\DeclarePairedDelimiter{\braces}{\{}{\}}		% for braces
\DeclarePairedDelimiter{\bracks}{[}{]}		% for brackets
\DeclarePairedDelimiter{\parens}{(}{)}		% for parentheses

\DeclarePairedDelimiter{\abs}{\lvert}{\rvert}		% for absolute value
\DeclarePairedDelimiter{\ceil}{\lceil}{\rceil}		% for ceiling
\DeclarePairedDelimiter{\floor}{\lfloor}{\rfloor}		% for floor
\DeclarePairedDelimiter{\clip}{[}{]}		% for clipping
\DeclarePairedDelimiter{\negpart}{[}{]_{-}}		% for negative part
\DeclarePairedDelimiter{\pospart}{[}{]_{+}}		% for positive part

\DeclarePairedDelimiterX{\inner}[2]{\langle}{\rangle}{#1, #2}		% for scalar product
%\DeclarePairedDelimiterX{\inner}[2]{\langle}{\rangle}{#1,#2}		% for scalar product

\DeclarePairedDelimiter{\norm}{\lVert}{ \rVert}		% for norm
\DeclarePairedDelimiterXPP{\twonorm}[1]{}{\lVert}{\rVert}{}{#1}		% for L2 norm
\DeclarePairedDelimiterXPP{\dnorm}[1]{}{\lVert}{\rVert}{_{\ast}}{#1}		% for dual norm
%\newcommand{\dnorm}[1]{\norm{#1}_{\ast}}		% for dual norm

\DeclarePairedDelimiter{\bra}{\langle}{\rvert}		% for bras
\DeclarePairedDelimiter{\ket}{\lvert}{\rangle}		% for kets
\DeclarePairedDelimiterX{\braket}[2]{\langle}{\rangle}{#1,#2}		% for brakets
%\DeclarePairedDelimiterX{\braket}[2]{\langle}{\rangle}{#1\mathopen{}\delimsize\vert\mathopen{}#2}

\DeclarePairedDelimiterX{\setdef}[2]{\{}{\}}{#1:#2}		% for set builder notation
\DeclarePairedDelimiterXPP{\exclude}[1]{\mathopen{}\setminus}{\{}{\}}{}{#1}


%----------------------------------------------------------------------
%% Modifiers
%----------------------------------------------------------------------
\newcommand{\alt}[1]{#1'}		% for alternates


%----------------------------------------------------------------------
%% Number fields
%----------------------------------------------------------------------
\newcommand{\F}{\mathbb{F}}		% generic field
\newcommand{\N}{\mathbb{N}}		% for naturals
\newcommand{\Z}{\mathbb{Z}}		% for integers
\newcommand{\Q}{\mathbb{Q}}		% for rationals
\newcommand{\R}{\mathbb{R}}		% for reals
\newcommand{\C}{\mathbb{C}}		% for complex numbers (may clash)

%----------------------------------------------------------------------
%% Operators
%----------------------------------------------------------------------
\DeclareMathOperator*{\argmax}{arg\,max}		% for argmax
%\DeclareMathOperator*{\argmin}{arg\,min}		% for argmin
\DeclareMathOperator*{\intersect}{\bigcap}		% for intersections
\DeclareMathOperator*{\union}{\bigcup}		% for unions

\DeclareMathOperator{\aff}{aff}		% for affine hull
\DeclareMathOperator{\bd}{bd}		% for boundary
\DeclareMathOperator{\bigoh}{\mathcal{O}}		% for Landau O
\DeclareMathOperator{\card}{card}		% for cardinality
\DeclareMathOperator{\cl}{cl}		% for closure
\DeclareMathOperator{\conv}{conv}		% for convex hull (but see also \simplex)
\DeclareMathOperator{\crit}{crit}		% for gap function
\DeclareMathOperator{\diag}{diag}		% for diagonal matrices
\DeclareMathOperator{\diam}{diam}		% for diameter
\DeclareMathOperator{\dist}{dist}		% for distance
\DeclareMathOperator{\dom}{dom}		% for domain
\DeclareMathOperator{\eig}{eig}		% for eigenvalues
\DeclareMathOperator{\ess}{ess}		% for essential
\DeclareMathOperator{\grad}{\nabla}		% for gradient
\DeclareMathOperator{\Hess}{Hess}		% for Hessian
\DeclareMathOperator{\ind}{ind}		% for index
\DeclareMathOperator{\im}{im}		% for image
\DeclareMathOperator{\intr}{int}		% for interior
\DeclareMathOperator{\Jac}{D}		% for Jacobian
\DeclareMathOperator{\one}{\mathds{1}}		% for indicator
\DeclareMathOperator{\proj}{pr}		% for projection
\DeclareMathOperator{\prox}{prox}		% for prox
\DeclareMathOperator{\rank}{rank}		% for rank
\DeclareMathOperator{\relint}{ri}		% for relative interior
\DeclareMathOperator{\sign}{sgn}		% for sign
\DeclareMathOperator{\supp}{supp}		% for support
\DeclareMathOperator{\Sym}{Sym}		% for symmetric
\DeclareMathOperator{\tr}{tr}		% for trace
\DeclareMathOperator{\unif}{unif}		% for uniform distribution
\DeclareMathOperator{\vol}{vol}		% for volume



%----------------------------------------------------------------------
%% Sundries
%----------------------------------------------------------------------
\newmacro{\coef}{\lambda}		% for coefficient
\newmacro{\dd}{\:\mathrm{d}}		% for integrators
\newmacro{\intR}{\int_{\R^{\vdim}}}		% for integration over full domains
\newmacro{\intRR}{\int_{\R^{\vdim}  \times \R^{\vdim}  }}		% for integration over double full domains
\newmacro{\nn}{\nonumber}		% for equations

\newcommand{\subs}{\leftarrow}      % for substitution


%\newcommand{\ddt}[1]{\frac{d#1}{dt}}		% for Leibniz
\newcommand{\ddt}{\frac{\mathrm{d}}{\mathrm{d}t}}		% for Leibniz
\newcommand{\ddc}{\frac{\partial}{\partial \point_{\coord} }}		% for Leibniz

\newcommand{\del}{\partial}		% for derivatives
\newcommand{\eps}{\varepsilon}		% for better epsilon
\newcommand{\pd}{\partial}		% for derivatives
\newcommand{\wilde}{\widetilde}		% for wide tildes

\newcommand{\insum}{\sum\nolimits}		% for compact sums
\newcommand{\inprod}{\prod\nolimits}		% for compact products

\newmacro{\pexp}{p}		% for first exponent
\newmacro{\qexp}{q}		% for second exponent
\newmacro{\rexp}{r}		% for third exponent

\newcommand{\dsum}{\oplus}		% for direct sums


\newcommand{\const}{ \mathrm{const.} }

%----------------------------------------------------------------------
%% Text and formatting
%----------------------------------------------------------------------
\newcommand{\cf}{cf.\xspace}		% for consistency
\newcommand{\eg}{e.g.,\xspace}		% for consistency
\newcommand{\ie}{i.e.,\xspace}		% for consistency
\newcommand{\vs}{vs.\xspace}		% for consistency

\newcommand{\textbrac}[1]{\textup[#1\textup]}		% for upshape brackets
\newcommand{\textpar}[1]{\textup(#1\textup)}		% for upshape parentheses

\newcommand{\dis}{\displaystyle}		% for forcing display style
\newcommand{\txs}{\textstyle}		% for forcing inline style



%----------------------------------------------------------------------
%% Riemannian Manifolds
%----------------------------------------------------------------------
\newmacro{\mfd}{\mathcal{M}}		% for metric tensor
\newmacro{\curve}{\gamma}          % for curves
\newcommand{\ptrans}[3]{ \Gamma_{#1 \rightarrow #2} \left( #3 \right)}      % for parallel trasnport
\newmacro{\sect}{\mathcal{K}}    % for sectional curvatures

%**********************************************************************
%***    MACROS: SET THEORY
%**********************************************************************

%----------------------------------------------------------------------
%% Points and sets
%----------------------------------------------------------------------
\newcommand{\from}{\colon}		% for function definition
\newcommand{\too}{\rightrightarrows}		% for correspondences
\newcommand{\injects}{\hookrightarrow}		% for injections
\newcommand{\surjects}{\twoheadrightarrow}		% for surjections

%\newcommand{\defeq}{\coloneqq}		% for direct definition
\newcommand{\eqdef}{\eqqcolon}		% for reverse definition

\newmacro{\sset}{\mathcal{S}}		% for generic set

%\newmacro{\points}{\mathcal{Z}}		% for point set
\newmacro{\points}{\mfd}		% for Riemannian RM
\newmacro{\intpoints}{\points^{\circ}}		%for point set interior
\newmacro{\point}{x}		% for generic point
\newmacro{\pointalt}{\alt\point}		% for alternate point

\newmacro{\dpoints}{\mathcal{W}}		% for second point set (duals, etc.)
\newmacro{\dpoint}{w}		% for second generic point
\newmacro{\dpointalt}{\alt\dpoint}		% for second alternate variable

\newmacro{\base}{p}		% for reference point
\newmacro{\basealt}{q}		% for alternate reference point

\newcommand{\test}[1][\point]{\hat{#1}}		% for test point (\point by default)
\newcommand{\tests}{\test[\points]}		% for set of test points

\newmacro{\open}{\mathcal{U}}		% for open sets
\newmacro{\closed}{\mathcal{C}}		% for closed sets
\newmacro{\cpt}{\mathcal{K}}		% for compact sets
\newmacro{\nbhd}{\mathcal{U}}		% for neighborhoods


%**********************************************************************
%*****	MACROS: SEQUENCES AND TIME SERIES
%**********************************************************************

%----------------------------------------------------------------------
%% Basic indices
%----------------------------------------------------------------------
\newmacro{\start}{1}		% for start index
\newmacro{\halfafterstart}{3/2}		% for second index
\newmacro{\afterstart}{2}		% for second index
\newmacro{\running}{\start,\afterstart,\dotsc}		% for running index
\newmacro{\halfrunning}{\start,\halfafterstart,\dotsc}

\newmacro{\runalt}{k}		% for running sequence index
\newmacro{\run}{n}		% for main sequence index
\newmacro{\nRuns}{T}		% for total number of runs
\newmacro{\runs}{\mathcal{\nRuns}}		% for set of runs


%----------------------------------------------------------------------
%% Sequences and recursions
%----------------------------------------------------------------------
\newmacro{\state}{Z}		% for main iterate
\newmacro{\dstate}{Y}		% for other iterate

\newcommand{\avg}[1][\state]{\bar{#1}}		% for averaging (X by default)
%\newcommand{\avg}[1][\state]{\debug{\bar#1}_{\nRuns}}		% for last ergodic state (X by default)
\newcommand{\new}[1][\point]{#1^{+}}		% for new iterate (x by default)

\newcommand{\init}[1][\state]{\debug{#1}_{\start}}		% for initial value (X by default)
\newcommand{\afterinit}[1][\state]{\debug{#1}_{\afterstart}}		% for second value (X by default)
\newcommand{\preiter}[1][\state]{\debug{#1}_{\runalt-1}}		% for iterated value (X by default)
\newcommand{\iter}[1][\state]{\debug{#1}_{\runalt}}		% for iterated value (X by default)
\newcommand{\afteriter}[1][\state]{\debug{#1}_{\runalt+1}}		% for iterated value (X by default)
\newcommand{\preprev}[1][\state]{\debug{#1}_{\run-2}}		% for previous value (X by default)
\newcommand{\prev}[1][\state]{\debug{#1}_{\run-1}}		% for previous value (X by default)
\newcommand{\curr}[1][\state]{\debug{#1}_{\run}}		% for current value (X by default)
\newcommand{\prelead}[1][\state]{\debug{#1}_{\run-1}^{+}}		% for current value (X by default)
\newcommand{\lead}[1][\state]{\debug{#1}_{\run}^{+}}		% for current value (X by default)
%\renewcommand{\next}[1][\state]{\debug{#1}_{\run+1}}		% for current value (X by default)
%\newcommand{\next}[1][\state]{\debug{#1}_{\run+1}}



%**********************************************************************
%***    MACROS: LINEAR ALGEBRA
%**********************************************************************

%----------------------------------------------------------------------
%% Vector spaces
%----------------------------------------------------------------------
\newmacro{\vecspace}{\R^{\vdim}}		% for generic vector space

\newmacro{\coord}{i}		% for index
\newmacro{\vdim}{d}		% for dimension
\newmacro{\vvec}{v}		% for generic vector
\newmacro{\bvec}{e}		% for basis vector
\newmacro{\bvecs}{\mathcal{E}}		% for basis vectors

\newmacro{\subspace}{\mathcal{W}}		% for subspace
\newmacro{\wvec}{w}		% for generic subspace vector
\newmacro{\subdim}{m}		% for subspace dimension

\newmacro{\tanhull}{\mathcal{Z}}		% for tangent hull
\newmacro{\tanvec}{z}		% for tangent vectors


%----------------------------------------------------------------------
%% Duality
%----------------------------------------------------------------------
\newcommand{\dual}[1]{#1^{\ast}}		% for dual variables
\newmacro{\dspace}{\dual\vecspace}		% for dual space
\newmacro{\dvec}{v}		% for dual vector
\newmacro{\dbvec}{\eps}		% for dual basis vectors


%----------------------------------------------------------------------
%% Matrices and vectors
%----------------------------------------------------------------------
%\newmacro{\ones}{\mathbf{1}}		% for vector of ones
\newmacro{\mat}{M}		% for generic matrix
\newmacro{\eye}{I}		% for identity matrix

\newcommand{\mg}{\succ}		% for positive-definite
\newcommand{\mgeq}{\succcurlyeq}		% for positive-semidefinite
\newcommand{\ml}{\prec}		% for negative-definite
\newcommand{\mleq}{\preccurlyeq}		% for negative-semidefinite






%**********************************************************************
%***    MACROS: PROBABILITY AND STATISTICS
%**********************************************************************

%----------------------------------------------------------------------
%% Probability
%----------------------------------------------------------------------
\DeclareMathOperator{\ex}{\mathbb{E}}		% for expectations
\DeclareMathOperator{\prob}{\mathbb{P}}		% for probability
\DeclareMathOperator{\Var}{Var}		% for variance
\DeclareMathOperator{\simplex}{\Delta}		% for simplices

\newmacro{\seed}{\omega}		% for seed
\newmacro{\seeds}{\Omega}		% for seed space
\newmacro{\history}{\mathcal{H}}		% for filtrations

\newmacro{\sample}{\omega}		% for samples
\newmacro{\samples}{\Omega}		% for sample space
\newmacro{\filter}{\mathcal{F}}		% for filtrations
\newmacro{\probspace}{(\samples,\filter,\prob)}		% for probability space

\newmacro{\event}{\mathcal{E}}       % for event
\newmacro{\eventalt}{\mathcal{H}}       % for alternate event
\newcommand{\comp}[1]{#1^{\mathtt{c}}}		% for complement

\newmacro{\mean}{\mu}		% for mean of distribution
\newmacro{\sdev}{\sigma}		% for mean of distribution
\newmacro{\variance}{\sdev^{2}}		% for mean of distribution

\newmacro{\dkl}{D_{\mathrm{KL}}}		% for Kullback Leibler
%\newcommand{\as}{\debug{\textpar{a.s.}}\xspace}		% for almost surely
\newcommand{\as}{{{a.s.}}}		% for almost surely


\providecommand\given{}		% empty command for conditionals

\DeclarePairedDelimiterXPP{\exof}[1]{\ex}{[}{]}{}{%		% for conditional expectations
\renewcommand\given{\nonscript\,\delimsize\vert\nonscript\,\mathopen{}} #1}

\DeclarePairedDelimiterXPP{\probof}[1]{\prob}{(}{)}{}{%		% for conditional probabilities
\renewcommand\given{\nonscript\:\delimsize\vert\nonscript\:\mathopen{}} #1}

\newcommand{\oneof}[1]{\one_{\{#1\}}}
%\DeclarePairedDelimiterXPP{\oneof}[1]{\one}{\{}{\}}{}{%		% for conditional expectations
%\renewcommand\given{\nonscript\,\delimsize\vert\nonscript\,\mathopen{}} #1}




%----------------------------------------------------------------------
%% Geometry
%----------------------------------------------------------------------
\newmacro{\gmat}{g}		% for metric tensor
\newmacro{\gdist}{\dist_{\gmat}}
\newmacro{\ball}{\mathbb{B}}		% for balls
\newmacro{\sphere}{\mathbb{S}}		% for spheres




%**********************************************************************
%***    MACROS: SCHROEDINGER BRIDGES
%**********************************************************************

\newmacro{\mbase}{\mu}
\newmacro{\m}{\mbase_0}     % for mean of the first gaussian
\newmacro{\malt}{\mbase_\horizon}     % for mean of the second gaussian

\newmacro{\covarbase}{\Sigma}
\newmacro{\covar}{\covarbase_0}     % for covariance of the first gaussian
\newmacro{\covaralt}{\covarbase_\horizon}     % for covariance of the first gaussian

\newmacro{\ctime}{t}
\newmacro{\ctimealt}{s}
\newmacro{\horizon}{1}

\newmacro{\ratiosym}{r}    % for symbol of the the ratio between \ctime and \horizon
\newcommand{\ratio}[1][\ctime]{\ratiosym_{#1}}     % for ratio between \ctime and \horizon
\newcommand{\ratioc}[1][\ctime]{\bar{\ratiosym}_{#1}}     % for complementary ratio between \ctime and \horizon




\newmacro{\scalingbase}{\mathrm{QV}}     % for quadratic variation
\newcommand{\scaling}[1][\ctime]{ \scalingbase\parens*{ #1 } } 
\newcommand{\scalingsq}[1][\ctime]{ \scalingbase^2\parens*{ #1 } }

\newmacro{\KLbase}{D_{\mathrm{KL}}}
\newcommand{\KL}[2]{ \KLbase\parens*{ #1 \Vert #2 } }


\newmacro{\sdebase}{ X }
\newcommand{\sde}[1][\ctime]{ \sdebase_{ #1 } }     % for marginal variables
\newcommand{\dsde}[1][\ctime]{ \dd\sdebase_{ #1 } }     % for marginal variables
%\newmacro{\dXt}{ \dd X_t }     % for SDEs
\newmacro{\tinv}{\tau}

\newmacro{\testfbase}{u}
\newmacro{\generator}{\mathcal{L}_{\ctime}}

%\newcommand{\testf}[2][\ctime]{\testfbase\parens*{#1, #2} }
%\newcommand*\Laplace{\mathop{}\!\mathbin\bigtriangleup}
\newcommand{\Laplace}{\Delta}

\usepackage{xparse}
\NewDocumentCommand{\testf}{ O{\ctime} O{\point} }{ \testfbase\parens*{#1,#2} }


\newmacro{\dconst}{\lambda}     % for constant drift
\newmacro{\sconst}{\mathbf{v}}     % for constant shift
\newmacro{\vconst}{\omega}     % for constant volatility


\renewcommand\theadgape{\Gape[4pt]}
\renewcommand\cellgape{\Gape[4pt]}

\newmacro{\qvbase}{\mathrm{q}}

\newcommand{\qv}[1][\ctime]{\qvbase\parens*{ #1 } }
\newcommand{\qve}[1][\horizon]{\qvbase\parens*{ #1 } }
\newcommand{\dqv}[1][\ctime]{ \dot{\qvbase}\parens*{ #1 } }

\newmacro{\subVPfbase}{ \beta }
\newcommand{\subVPf}[1][\ctime]{ {\subVPfbase}\parens*{ #1 } }

\newcommand{\aggtimeqr}[1][\ctime]{ \aggtimebase^4_{ #1 } }


\newmacro{\refsdebase}{ Y }     % for ref SDEs
\newcommand{\refsde}[1][\ctime]{ \refsdebase_{ #1 } }
\newcommand{\drefsde}[1][\ctime]{ \dd \refsdebase_{ #1 } }

\newmacro{\wiescalebase}{ g }
\newcommand{\wiescale}[1][\ctime]{ \wiescalebase\parens*{ #1 } }
\newcommand{\wiescalesq}[1][\ctime]{ \wiescalebase^2\parens*{ #1 } }

\newmacro{\QVbase}{ \mathrm{qv} }
\newcommand{\QV}[1][\ctime]{ \QVbase\parens*{ #1 }  }
\newcommand{\dQV}[1][\ctime]{ \dot{\QVbase}\patens*{ #1 }  }

\newmacro{\driftbase}{  c  }
\newcommand{\drift}[1][\ctime]{ \driftbase\parens*{ #1 }  }

\newmacro{\shiftbase}{  \alpha  }
\newcommand{\shift}[1][\ctime]{ \shiftbase\parens*{ #1 }  }

\newmacro{\volatbase}{  g  }
\newcommand{\volat}[1][\ctime]{ \volatbase_{ #1 }  }
\newcommand{\volatsq}[1][\ctimealt]{ \volatbase^2_{ #1 }  }

\newmacro{\refprobase}{\mathbb{Q}}     % for alternative general stochastic processes alphabet
\newcommand{\refpro}[1][\ctime]{\refprobase_{ #1 }}
\newmacro{\refjoint}{\refprobase_{\mathrm{0\horizon}} }    % for alternative general stochastic processes alphabet
\newcommand{\refapprox}[1][\ctime]{ {\refprobase}^{\solbase}_{ #1 }}

%----------------------------------------------------------------------
%%% OBSOLETE
%%----------------------------------------------------------------------
%\newmacro{\wienerbase}{\mathbb{W}}     % for reversible Wiener processes alphabet
%\newmacro{\Wt}{\wienerbase_{\ctime}}     % for reversible Wiener processes
%\newmacro{\dWt}{ \dd \Wt }     % for Wiener processes increments
%----------------------------------------------------------------------
%%% LINEAR SDES
%----------------------------------------------------------------------



\newmacro{\Wienerbase}{\mathbb{W}}     % for reversible Wiener processes alphabet
\newcommand{\Wiener}[1][\ctime]{ \Wienerbase_{#1} }     % for reversible Wiener processes
\newcommand{\dWiener}[1][\ctime]{ \dd \Wienerbase_{#1} }     % for Wiener processes increments

\newmacro{\aggtimebase}{  \tau  }
%\newcommand{\aggtime}[1][\ctime]{ \aggtimebase\parens*{ #1 }  }
%\newcommand{\aggtimeinv}[1][\ctimealt]{ \aggtimebase^{-1}\parens*{ #1 }  }
%\newcommand{\aggtimesq}[1][\ctimealt]{ \aggtimebase^2\parens*{ #1 }  }
%\newcommand{\aggtimesqinv}[1][\ctimealt]{ \aggtimebase^{-2}\parens*{ #1 }  }
%\newcommand{\daggtime}[1][\ctime]{ \dot{\aggtimebase}\parens*{ #1 }  }

\newcommand{\aggtime}[1][\ctime]{ \aggtimebase_{ #1 }  }
\newcommand{\aggtimeinv}[1][\ctimealt]{ \aggtimebase^{\ssstyle -1}_{ #1 }  }
\newcommand{\aggtimesq}[1][\ctimealt]{ \aggtimebase^2_{ #1 }  }
\newcommand{\aggtimesqinv}[1][\ctimealt]{ \aggtimebase^{-2}_{ #1 }  }
\newcommand{\daggtime}[1][\ctime]{ \dot{\aggtimebase}_{ #1 }  }
%\newmacro{\mYbase}{ Y }

\newmacro{\mrsdebase}{ \eta}
\NewDocumentCommand{\mYcinit}{ O{\ctime}  }{  \mrsdebase\parens*{#1 } } %  \middle| \refsde[0] } }

\newmacro{\kernelbase}{ \kappa}
\NewDocumentCommand{\kernel}{ O{\ctime} O{\ctime'}  }{  \kernelbase\parens*{#1, #2} }% \middle| \refsde[0] } }


\newmacro{\intdasq}{ \int_0^\ctime {\aggtimesqinv[\ctimealt]}{\volatsq} \dd \ctimealt }
\newmacro{\intdasqT}{ \int_0^\horizon {\aggtimesqinv[\ctimealt]}{\volatsq} \dd \ctimealt }


\newcommand{\law}[1]{ \mathrm{law}\parens*{ #1 } }





\newmacro{\ssstyle}{\scriptscriptstyle}
\newmacro{\sssNcal}{\ssstyle\Ncal}

\newmacro{\solbase}{\star}

\newmacro{\Cstar}{C_{\sdev_{\solbase}}}

\newmacro{\pbase}{\mathbb{P}}     % for general stochastic processes alphabet
%\newmacro{\Pt}{\pbase_{\ctime}}     % for general stochastic processes
\newmacro{\Pinit}{\pbase_{{0}}}     % for initial marginal
\newmacro{\Pend}{\pbase_{{\horizon}}}     % for end marginal
\newcommand{\Pmargin}[1][\ctime]{ \pbase_{{#1}} }     % for general marginals
%\newcommand{\Pro}[1][\ctime]{ \pbase_{#1} }     % for general marginals
\newcommand{\Psol}[1][\ctime]{ \pbase^{\solbase}_{ #1 } }
\newmacro{\Pjoint}{ \pbase_{ \mathrm{0\horizon}} }

\newmacro{\distbase}{ \pbase }
\newmacro{\ini}{ {0} }
\newmacro{\distinit}{ \hat{\distbase}_{ \ini } }
\newmacro{\en}{ {\horizon} }
\newmacro{\distend}{ \hat{\distbase}_{ \en} }


\newcommand{\Xsol}[1][\ctime]{ X^{\solbase}_{ #1 } }

\newcommand{\dXsol}[1][\ctime]{ \dd X^{\solbase}_{ #1 } }

\newcommand{\meansol}[1][\ctime]{ \mu^{\solbase}_{ #1 } }
\newcommand{\dmeansol}[1][\ctime]{ \dot{\mu}^{\solbase}_{ #1 } }
\newcommand{\Sigmasol}[1][\ctime]{ \Sigma^{\solbase}_{ #1 } }
\newcommand{\Sigmasolinv}[1][\ctime]{ \Sigma^{\solbase-1}_{ #1 } }
\newcommand{\dSigmasol}[1][\ctime]{ \dot{\Sigma}^{\solbase}_{ #1 } }

\newcommand{\dratio}[1][\ctime]{\dot{\ratiosym}_{#1}}     % for ratio between \ctime and \horizon
\newcommand{\dratioc}[1][\ctime]{\dot{\bar{\ratiosym}}_{#1}}     % for complementary ratio between \ctime and \horizon


\newcommand{\cmeansol}[2]{ \mu^{\solbase}_{ #1 \vert #2 } }
\newcommand{\cSigmasol}[2]{ \Sigma^{\solbase}_{ #1 \vert #2 } }

%\newmacro{\effsc}{\rho}    % for effectively scaling



\newmacro{\efftrbase}{\rho}    % for effectively scaling
\newcommand{\efftr}[1][\ctime]{  \efftrbase_{ #1 }  }
%\newcommand{\deffsc}[1][\ctime]{\dot{\effscbase}\parens*{#1}}
%\newcommand{\scalingsol}[1][\ctime]{ \scalingbase^\star\parens*{ #1 } }
%\newcommand{\scalingsolsq}[1][\ctime]{ \scalingbase^{\star2}\parens*{ #1 } }

\newmacro{\paramf}{ \theta }
\newmacro{\SBfbase}{ Z }
%\newmacro{\SBbbase}{ \SBfbase^{\scriptscriptstyle\textup{rev}} }
\newmacro{\paramb}{ \phi }
\newmacro{\SBbbase}{ \hat{\SBfbase} }
\NewDocumentCommand{\SBf}{ O{\ctime} O{\point} O{\paramf} }{ \SBfbase_{#1}^{#3}\parens{#2} }
\NewDocumentCommand{\SBb}{ O{\ctime} O{\point} O{\paramb} }{ \SBbbase_{#1}^{#3}\parens{#2} }

\newmacro{\GSBfbase}{ f_{\sssNcal} } %f_{\scriptscriptstyle\textup{GSB}} }
\newmacro{\GSBbbase}{ \hat{\GSBfbase}}%^{\scriptscriptstyle\textup{rev}} }
\NewDocumentCommand{\GSBf}{ O{\ctime} O{\point} }{ \GSBfbase\parens*{#1,#2} }

%\newcommand{\dscaling}[1][\ctime]{ \dot{\scalingbase}\parens*{ #1 } } 

\newmacro{\tshiftbase}{ \zeta }%[1][\ctime]{ \dot{\scalingbase}\parens*{ #1 } } 
\newcommand{\tshift}[1][\ctime]{ \tshiftbase\parens*{ #1 } } 


\newcommand{\Div}[1][\point]{ \nabla_{ {#1} } \cdot }
\newmacro{\dt}{ \dd \ctime}


\newmacro{\loss}{\ell}
%\newcommand{\lossf}{ \loss\parens*{\point_{\horizon};\paramf} }
\NewDocumentCommand{\lossf}{ O{\point_{\horizon}} O{\paramf} }{  \loss\parens*{ #1; #2 }}

%\newcommand{\lossb}{ \loss\parens*{\point_{0};\paramb} }
\NewDocumentCommand{\lossb}{ O{\point_{0}} O{\paramb} }{  \loss\parens*{ #1; #2 }}


\newmacro{\caching}{M}
\newmacro{\outeriter}{K_\textup{out}}
\newmacro{\inneriter}{K_\textup{in}}

\newmacro{\pretriterf}{K_{\paramf}}
\newmacro{\pretriterb}{K_{\paramb}}

\newmacro{\lrbase}{\gamma}
\newmacro{\lrf}{\lrbase_{\paramf}}
\newmacro{\lrb}{\lrbase_{\paramb}}


\newmacro{\Ninit}{\Ncal_{\ini}}
\newmacro{\Nend}{\Ncal_{\en}}

\newmacro{\tdriftbase}{ f }
%\newcommand{\tdrift}[1][\ctime]{ \tdriftbase\parens*{ #1 } }

\NewDocumentCommand{\tdrift}{ O{\ctime} O{\refsde} }{ \tdriftbase\parens*{#1,#2} }

\newmacro{\SB}{ {\scriptscriptstyle\textup{SB}} } %{\scriptscriptstyle\mathrm{SB}} }

\newcommand{\pSB}[1]{  p^{\SB}_{#1} }


%Problem setup:
\newcommand{\T}{\mathbb{T}}
\newcommand{\Sp}{\mathbb{S}}
\newcommand{\x}{\mathbf{x}}
\newcommand{\Pssol}{\pi^\star}

%----------------------------------------------------------------------
%%% ACRONYMS
%----------------------------------------------------------------------
\newacro{LHS}{left-hand side}
\newacro{RHS}{right-hand side}
\newacro{iid}[i.i.d.]{independent and identically distributed}
\newacro{lsc}[l.s.c.]{lower semi-continuous}


\newacro{GAN}{generative adversarial network}
\newacro{NN}{neural network}
\newacro{FTRL}{``follow the regularized leader''}
\newacro{wp1}[w.p.$1$]{with probability $1$}


\newacro{SDE}{stochastic differential equation}
\newacro{SB}{Schr\"odinger bridge}
\newacro{GSB}[GSB]{Gaussian Schr\"odinger bridge}


\newacro{SGM}{score-based generative model}

\newacro{SMLD}{score matching with Langevin dynamics}
\newacro{DDPM}{denoising diffusion probabilistic model}

\newacro{OU}{Ornstein\textendash Uhlenbeck}
\newacro{BM}{Brownian motion}
\newacro{BDT}{Black–Derman–Toy}



\newacro{VESDE}[VE SDE]{variance exploding \ac{SDE}}
\newacro{VPSDE}[VP SDE]{variance preserving \ac{SDE}}
\newacro{DSB}{diffusion Schr\"odinger bridge}
\newacro{IPF}{iterative proportional fitting}

\newmacro{\acroalg}{\textsc{GSBflow}}   % acronym for our overall algorithm
%\newacro{PSD}[p.s.d.]{positive definite}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% THEOREMS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\theoremstyle{plain}
%\newtheorem{theorem}{Theorem}[section]
%\newtheorem{proposition}[theorem]{Proposition}
%\newtheorem{lemma}[theorem]{Lemma}
%\newtheorem{corollary}[theorem]{Corollary}
%\theoremstyle{definition}
%\newtheorem{definition}[theorem]{Definition}
%\newtheorem{assumption}[theorem]{Assumption}
%\theoremstyle{remark}
%\newtheorem{remark}[theorem]{Remark}

% Todonotes is useful during development; simply uncomment the next line
%    and comment out the line below the next line to turn off comments
%\usepackage[disable,textsize=tiny]{todonotes}
% \usepackage[textsize=tiny]{todonotes}

% The \icmltitle you define below is probably too long as a header.
% Therefore, a short form for the running title is supplied here:
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\renewcommand{\thefootnote}{\fnsymbol{footnote}}

\title{Aligned Diffusion Schr\"odinger Bridges}

% The standard author block has changed for UAI 2023 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1,2]{Vignesh Ram Somnath$^*$}
\author[1,3]{Matteo Pariset$^*$}
\author[1]{Ya-Ping Hsieh}
\author[2]{\\Maria Rodriguez Martinez}
\author[1]{Andreas Krause}
\author[1]{Charlotte Bunne}
% Add affiliations after the authors
\affil[1]{%
    Department of Computer Science\\
    ETH Z\"urich
}
\affil[2]{%
    IBM Research Z\"urich
}
\affil[3]{%
    Department of Computer Science\\
    EPFL
  }
  
\begin{document}
\renewcommand\ttdefault{lmtt}
\maketitle

\begin{abstract}
Diffusion Schr\"odinger bridges (\acsu{DSB}) have recently emerged as a powerful framework for recovering stochastic dynamics via their marginal observations at different time points. Despite numerous successful applications, existing algorithms for solving \acp{DSB} have so far failed to utilize the structure of \emph{aligned} data, which naturally arises in many biological phenomena. In this paper, we propose a novel algorithmic framework that, for the first time, solves \acp{DSB} while respecting the data alignment. Our approach hinges on a combination of two decades-old ideas: The classical Schr\"odinger bridge theory and Doob's \emph{$h$-transform}. Compared to prior methods, our approach leads to a simpler training procedure with lower variance, which we further augment with principled regularization schemes. This ultimately leads to sizeable improvements across experiments on synthetic and real data, including the tasks of predicting conformational changes in proteins and temporal evolution of cellular differentiation processes.
\end{abstract}

\footnotetext[1]{Equal contribution.}
\section{Introduction}
\label{sec:intro}
\looseness -1 The task of transforming a given distribution into another lies at the heart of many modern machine learning applications such as single-cell genomics \citep{tong2020trajectorynet, schiebinger2019optimal, bunne2022supervised}, meteorology \citep{fisher2009data}, and robotics \citep{chen2021optimal}. 
To this end, \aclp{DSB} \citep{de2021diffusion,chen2021likelihood,vargas2021solving,liu2022deep} have recently emerged as a powerful paradigm due to their ability to generalize prior deep diffusion-based models, notably \acl{SMLD}
\citep{song2019generative,song2020score} and \aclp{DDPM} \citep{ho2020denoising}, which have achieved the state-of-the-art on many generative modeling problems.

\begin{figure}[t]
    \centering
    \includegraphics[width=\linewidth]{figures/fig_overview_proteins.pdf}
    \caption{Overview of \textsc{SBalign}: In biological tasks such as protein docking, one is naturally provided with {\em aligned} data in the form of unbound and bound structures of participating proteins. Our goal is to therefore recover a stochastic trajectory from the unbound ($\x_0$) to the bound ($\x_1$) structure. To achieve this, we connect the characterization of an SDE conditioned on $\x_0$ and $\x_1$ (utilizing the Doob's \emph{$h$-transform}) with that of a Brownian bridge between $\x_0$ and $\x_1$ (classical Schr\"odinger bridge theory). We show that this leads to a simpler training procedure with lower variance and strong empirical results.}
    \label{fig:overview_proteins}
\end{figure}
%To this end, 
%diffusion-based models have achieved state-of-the-art in many of the above, in particular, the \acdefp{DSB} \citep{de2021diffusion,chen2021likelihood,vargas2021solving,bunne2022recovering}, a paradigm recently emerged and has attracted significant attention due to its ability to generalize prior diffusion-based generative models, notably \ac{SMLD}
%\citep{song2019generative,song2020score} and \acp{DDPM} \citep{ho2020denoising},

Despite the wide success of \acp{DSB} solvers, a significant limitation of existing frameworks is that they fail to capture the \emph{alignment} of data: If $\distinit, \distend$ are two (empirical) distributions between which we wish to interpolate, then a tacit assumption in the literature is that the dependence of $\distinit$ and $\distend$ is unknown and somehow has to be recovered. Such an assumption, however, ignores important scenarios where the data is \emph{aligned}, meaning that the samples from $\distinit$ and $\distend$ naturally come in pairs $(\x^i_0,\x^i_1)_i^{N}$, which is common in many biological phenomena. Proteins, for instance, undergo conformational changes upon interactions with other biomolecules (protein docking, see Fig.~\ref{fig:overview_proteins}). The goal is to model conformational changes by recovering a (stochastic) trajectory $\x_t$ based on the positions observed at two-time points $\left(\x_0, \x_1\right)$. Failing to incorporate this alignment would mean that we completely ignore information on the correspondence between the initial and final points of the molecules, resulting in a much harder problem than necessary.
% Molecular dynamics, for instance, seeks to recover a (stochastic) trajectory $\x_t$ based on the positions observed at two time points $(\x_0,\x_1)$. Failing to incorporate this alignment would mean that we completely ignore information of the correspondence between initial and final points of a particular molecule, resulting in a much harder problem than necessary. Similar situations also
% arise in other biological processes such as conformational changes in proteins when interacting with other macromolecules (protein docking, see Fig.~\ref{fig:overview_proteins}). 
Beyond, the recent use of SBs has been motivated by an important task in molecular biology: Cells change their molecular profile throughout developmental processes \citep{schiebinger2019optimal,bunne2021jkonet} or in response to perturbations such as cancer drugs \citep{lotfollahi2019scgen,bunne2021learning}. As most measurement technologies are destructive assays, i.e., the same cell cannot be observed twice nor fully profiled over time, these methods aim at reconstructing cell dynamics from \emph{unpaired} snapshots.
Recent developments in molecular biology, however, aim at overcoming this technological limitation. For example, \citet{chen2022live} propose a transcriptome profiling approach that preserves cell viability. \citet{weinreb2020lineage} capture cell differentiation processes by clonally connecting cells and their progenitors through barcodes (see Appendix).
%then it is customary to assume that the relation of a sample $\x_0\sim\distinit$ is unknown to any sample $\x_1\sim\distend$. This implicit assumption, however, ignores the scenarios where the data naturally comes in pairs $(\x_0,\x_1)$, which is a common scenario


\looseness -1 Motivated by these observations, the goal of this paper is to propose a novel algorithmic framework for solving \acp{DSB} with (partially) \emph{aligned} data. Our approach is in stark contrast to existing works which, due to the lack of data alignment, all rely on some variants of \acdef{IPF} \citep{fortet1940resolution, kullback1968probability} and are thus prone to numerical instability. On the other hand, via a combination of the original theory of Schr\"odinger bridges \citep{schrodinger1931umkehrung,leonard2013survey} and the key notion of Doob's \emph{$h$-transform} \citep{doob1984classical, rogers2000diffusions}, we design a novel loss function that completely bypasses the \ac{IPF} procedure and can be trained with much lower variance.


To summarize, we make the following contributions:
\begin{itemize}[topsep=0pt]
\item To our best knowledge, we consider, for the first time, the problem of interpolation with \emph{aligned} data. We rigorously formulate the problem in the \ac{DSB} framework.

\item Based on the theory of Schr\"odinger bridges and $h$-transform, we derive a new loss function that, unlike prior work on \acp{DSB}, does not require an \ac{IPF}-like procedure to train. We also propose principled regularization schemes to further stabilize training.

\item We describe how interpolating aligned data can provide better reference processes for use in classical \acp{DSB}, paving the way to hybrid aligned/non-aligned \acp{SB}.

\item \looseness -1 We evaluate our proposed framework on both synthetic and real data. For experiments utilizing real data, we consider two tasks where such aligned data is naturally available. The first is the task of developmental processes in single-cell biology, and the second involves protein docking. For the protein docking task, a comprehensive treatment is elusive, owing to lack of appropriate datasets. Instead, we consider two associated subproblems: (i) modeling conformational changes between unbound and bound states of a protein, and (ii) rigid protein docking, i.e., identifying the best relative orientation. Our method demonstrates a considerable improvement over prior methods across various metrics, thereby substantiating the importance of taking the data alignment into account.  

% \item We illustrate the power of our method on both synthetic and real data. Our real experiment features conformation changes in protein docking, an important task in biology with numerous applications \citep{}.%, for which we provide the first diffusion-based method that takes the alignment of data into account. 
% ~Our results demonstrate a considerable improvement over prior methods, thereby substantiating the importance of taking the data alignment into account.
\end{itemize}

\para{Related work}
Solving \acp{DSB} is a subject of significant interest in recent years and has flourished in a number of different algorithms \citep{de2021diffusion,chen2021likelihood,vargas2021solving,bunne2022recovering,liudeep2022}. However, all these previous approaches focus on  \emph{unaligned} data, and therefore the methodologies all rely on \ac{IPF} and are hence drastically different from ours. In the experiments, we will demonstrate the importance of considering the alignment of data. 

An important ingredient in our theory is Doob's $h$-transform, which has recently also been utilized by \citet{liu2023learning} to solve the problem of constrained diffusion. However, their fundamental motivation is different from ours. \citet{liu2023learning} focus on learning the drift of the diffusion model and the $h$-transform \emph{together}, whereas ours is to read off the drift \emph{from} the $h$-transform with the help of {\em aligned data}. Consequently, there is no overlap between the two algorithms and their intended applications. 

To the best of our knowledge, the concurrent work of \citet{tong2023conditional} is the only existing framework that can tackle aligned data, which, however, is not their original motivation. In the context of solving \acp{DSB}, their algorithm can be seen as learning a vector field that generates the correct \emph{marginal} probability \citep[cf.][Proposition 4.3]{tong2023conditional}. Importantly, this is different from our aim of finding the \emph{pathwise} optimal solution of \acp{DSB}: If $(\x^i_{0,\textup{test}})_{i=1}^m$ is a test data set for which we wish to predict their destinations, then the framework of \citet{tong2023conditional} can only ensure that the marginal distribution $(\x^i_{1,\textup{test}})_{i=1}^m$ is correct, whereas ours is capable of predicting that $\x^i_{1,\textup{test}}$ is precisely the destination of $\x^i_{0,\textup{test}}$ for each $i$. This latter property is highly desirable in tasks like ML-accelerated protein docking.

To solve aligned \ac{SB} problems, we rely on mixtures of diffusion processes. Like in \citet{peluchetti2023diffusion}, we construct them from pairings and define an associated training objective inspired by score-based modeling. However, we represent the learned drift as a sum of the solution to an SB problem ($b$) and a pairing-related term ($\nabla \log h$). We parametrize the second part of the drift with neural networks, unlike \citet{schauer2017guided} which use an auxiliary (simpler) process.


\section{Background}
\label{sec:background}
\para{Problem formulation}
Suppose that we are given access to i.i.d. \emph{aligned} data $(\x_0^i,\x_1^i)_{i=1}^N$, where the marginal distribution of $\x^i_0$'s is $\distinit$ and of $\x_1^i$'s is $\distend$. Typically, we view $\distinit$ as the empirical marginal distribution of a stochastic process at time $t= 0$, and $\distend$ as the empirical marginal observed at $t=\horizon$. The goal is to reconstruct the stochastic process $\Pmargin$ based on $(\x_0^i,\x_1^i)_{i=1}^N$, \ie to transform $\distinit$ into $\distend$.

Such a task is ubiquitous in biological applications. For instance, understanding how proteins dock to other biomolecules is of significant interest in biology and has become a topic of intense study in recent years \citep{ganea2022independent, tsaban2022harnessing, corso2022diffdock}. In the protein docking task, $\x_0^i$ represents the 3D structures of the unbound proteins, while $\x_1^i$ represents the 3D structure of the bound complex. Reconstructing a stochastic process that diffuses $\x_0^i$'s to $\x_1^i$'s is tantamount to recovering the energy landscape governing the docking process.  Similarly, in molecular dynamics simulations, we have access to trajectories $\left(\x_t^i\right)_{t \in [0, 1]}$, where $\x_0^i$ and $\x_1^i$ represent the initial and final positions of the $i$-th molecule respectively. Any learning algorithm using these simulations should be able to respect the provided alignment. 


\para{Diffusion Schr\"odinger bridges}

To solve the interpolation problem, in \cref{sec:Methods}, we will invoke the framework of \acp{DSB}, which are designed to solve interpolation problems with \emph{unaligned} data. More specifically, given two marginals $\distinit$ and $\distend$, the \ac{DSB} framework proceeds by first choosing a reference process $\refpro$ using prior knowledge, for instance a simple Brownian motion, and then solve the entropy-minimization problem over all stochastic processes $\Pmargin$:
\begin{equation}
\label{eq:SB}
\tag{SB}
\min_{ \substack{ \Pinit = \distinit, \; \Pend = \distend} } \KL{\Pmargin}{\refpro}.
\end{equation}

Despite the fact that many methods exist for solving \eqref{eq:SB}  \citep{de2021diffusion,chen2021likelihood,vargas2021solving,bunne2022recovering}, none of these incorporate data \emph{alignment}. This can be seen by inspecting the objective \eqref{eq:SB}, in which the coupling information $(\x_0^i,\x_1^i)$ is completely lost as only its individual marginals $\distinit,\distend$ play a role therein. Unfortunately, it is well-known that tackling the marginals separately necessitates a forward-backward learning process known as the \acli{IPF} (IPF) procedure \citep{fortet1940resolution,kullback1968probability}, which constitutes the primary reason of high variance training, thereby confronting \acp{DSB} with numerical and scalability issues. Our major contribution is therefore to devise the first algorithmic framework that solves the interpolation problem with aligned data \emph{without} resorting to IPF.

%The reason is because the \emph{alignment} of the data is completely lost in the objective \eqref{eq:SB}: The objective only sees the individual marginals $\distinit$ and $\distend$, but does not care about how they are coupled. Therefore, naively applying the \ac{DSB} framework to interpolating aligned data ignores the natural pairing information $(\x_0^i,\x_1^i)$. 




\section{Aligned Diffusion Schr\"odinger Bridges}
\label{sec:Methods}
\newcommand{\fdrift}{b_t}
\newcommand{\doob}{h_t}
\newcommand{\doobs}{h_{t,\reg}}
\newcommand{\Loss}{L}
\newcommand{\reg}{\tau}
\newcommand{\cvolatbase}{\beta}
\newcommand{\cvolat}[1][\ctime]{\cvolatbase_{#1}}


In this section, we derive a novel loss function for \acp{DSB} with aligned data by combining two classical notions: The theory of \aclp{SB} \citep{schrodinger1931umkehrung,leonard2013survey,chen2021stochastic} and Doob's $h$-transform \citep{doob1984classical, rogers2000diffusions}. We then describe how solutions to DSBs with aligned data can be leveraged in the context of classical DSBs.

\subsection{Learning aligned diffusion Schr\"odinger bridges}
\para{Static SB and aligned data}

Our starting point is the simple and classical observation that \eqref{eq:SB} is the continuous-time analogue of the \emph{entropic optimal transport}, also known as the \emph{static} \acl{SB} problem \citep{leonard2013survey,chen2021stochastic,Peyre2019computational}:
\begin{equation}
\label{eq:static-SB}
\Pssol \defeq \argmin_{ \substack{ \Pinit = \distinit, \; \Pend = \distend} } \KL{\mathbb{P}_{0,1}}{\refprobase_{0,1}}
\end{equation}where the minimization is over all \emph{couplings} of $\distinit$ and $\distend$, and $\refprobase_{0,1}$ is simply the joint distribution of $\refpro$ at $t=0,\horizon$. In other words, if we denote by $\Psol$ the stochastic process that minimizes \eqref{eq:SB}, then the joint distribution $\Psol[0,\horizon]$ necessarily coincides with the $\Pssol$ in \eqref{eq:static-SB}. Moreover, since in \acp{DSB}, the data is always assumed to arise from $\Psol$, we see that:
\begin{quote}
The \emph{aligned} data $(\x_0^i,\x_1^i)_{i=1}^N$ constitutes samples of $\Pssol$.
\end{quote}
This simple but crucial observation lies at the heart of all derivations to come. 

Our central idea is to represent $\Psol$ via two different, but equivalent, characterizations, both of which involve $\Pssol$: That of a \emph{mixture} of reference processes with pinned end points, and that of conditional \acdefp{SDE}.

%, both of which well-known \cite{leonard2013survey,chen2021stochastic}.

  
%i.i.d. data $(\x_0^i,\x_1^i)_{i=1}^N$, where $\x_0$ represents the two undocked proteins while $\x^i_1$ represents the docked version. Our purpose is to learn a dynamics that recovers full process from $\x_0^i$ to $\x_1^i$.

%The challenge of this problem is that the underlying dynamics has to obtain certain \emph{symmetric properties} as exemplified in ample prior works \cite{}. For docking, we shall adopt the most recent framework of DiffDock \citep{corso2022diffdock}, where intelligent ideas ultimately lead to a largely reduced space, such as $\R^3 (\text{translations}), \Sp^2(\text{rotations}), \T(\text{torsions})$, on which the aforementioned dynamics takes place.

%To our knowledge, 


%The most important assumption of our work is that the nature is simulating an \emph{entropy-minimization} problem for the dynamics. More specifically, we assume that the nature carries each $\x_0^i$ to $\x_1^i$ via the stochastic process:
%\begin{equation}
%\label{eq:SB}
%\tag{SB}
%\min_{ \substack{ \Pinit = \distinit, \; \Pend = \distend} } \KL{\Pmargin}{\refpro}
%\end{equation}
%where $\x_0^i \sim \distinit$ and $\x_1^i \sim \distend$ are the marginal distributions, and $\refpro$ is a reference process that obeys the very same symmetry we strive for. In practice, $\refpro$ is usually taken to be the Brownian motion on various spaces. The problem \eqref{eq:SB} is known as the \acdef{SB}, which has recently found numerous applications in machine learning \cite{chen2021likelihood, de2021diffusion, vargas2021solving, bunne2022recovering}.
%
%
%Formulated in this way, it is important to notice that the data $(\x_0^i,\x_1^i)_{i=1}^N$ are \emph{paired} samples from the solution to the so-called \emph{static} \acl{SB}:
%\begin{equation}
%\label{eq:static-SB}
%\Pssol \defeq \argmin_{ \substack{ \Pinit = \distinit, \; \Pend = \distend} } \KL{\mathbb{P}_{0,1}}{\refprobase_{0,1}}.
%\end{equation}
%The \emph{pairing} is introduced by the fact that the identity of atoms in individual proteins is preserved throughout the docking process. This feature distinguishes our paper in that existing works assume only access to the \emph{unpaired} samples. How to take advantage of this piece of additional information is the central theme throughout our work.



%Let us denote the solution to \eqref{eq:SB} by $\Psol$.


\para{$\Psol$ from $\Pssol$: $\refpro$ with pinned end points}

For illustration purposes, we will assume that the reference process $\refpro$ is a Brownian motion with diffusion coefficient $\volat$:\footnote{\looseness -1 Extension to more involved reference processes is conceptually straightforward but notationally clumsy. Furthermore, reference processes of the form \eqref{eq:gtWt} are dominant in practical applications \citep{song2020score, bunne2022recovering}, so we omit the general case. }
\begin{equation}
\label{eq:gtWt}
\dd \refpro = \volat \dWiener.
\end{equation}
In this case, it is well-known that $\refpro$ \emph{conditioned} to start at $\x_0$ and end at $\x_1$ can be written in another \ac{SDE} \citep{mansuy2008aspects, liu2023learning}:
\begin{equation}
\label{eq:BB}
\dd X_t = \volatsq[\ctime] \frac{\x_1-X_t}{\cvolat[\horizon]-\cvolat[\ctime]} \dt + \volat\dWiener
\end{equation}
where $X_0 = \x_0$ and %, $\dWiener$ is itself a Brownian motion, and 
\begin{equation}
\cvolat\defeq \int_0^\ctime \volatsq \dd s.
\end{equation}We call the processes in \eqref{eq:BB} the \emph{scaled Brownian bridges} as they generalize the classical Brownian bridge, which corresponds to the case of $\volat \equiv 1$.

The first characterization of $\Psol$ is then an immediate consequence the following classical result in \acl{SB} theory: Draw a sample $(\x_0, \x_1) \sim \Pssol$ and connect them via \eqref{eq:BB}. The resulting path is a sample from $\Psol$ \citep{leonard2013survey, chen2021stochastic}. In other words, $\Psol$ is a \emph{mixture} of scaled Brownian bridges, with the mixing weight given by $\Pssol$.

%that an optimal path of $\Psol$ can be reconstructed by $\Pssol$ via scaled Brownian bridges: Draw a sample $(\x_0, \x_1) \sim \Pssol$ and connect them via \eqref{eq:BB}. The resulting path is a sample from $\Psol$ \citep{leonard2013survey, chen2021stochastic}. 

%More importantly, it is known that the Brownian bridges admit an \ac{SDE} representation \citep{mansuy2008aspects}:
%\begin{equation}
%\label{eq:BB}
%\dd X_t = \volatsq[\ctime] \frac{\x_1-X_t}{\cvolat[\horizon]-\cvolat[\ctime]} \dt + \volat\dWiener
%\end{equation}
%where $X_0 = \x_0$, $\dWiener$ is itself a Brownian motion, and 
%\begin{equation}
%\cvolat\defeq \int_0^\ctime \volatsq \dd s.
%\end{equation}


\para{$\Psol$ from $\Pssol$: \ac{SDE} representation}



Another characterization of $\Psol$ is that it is itself given by an \ac{SDE} of the form \citep{leonard2013survey, chen2021stochastic}
\begin{equation}
\label{eq:SB-SDE}
\dd X_t = \volatsq[\ctime]\fdrift(X_t) \dt + \volat\dWiener.
\end{equation}
Here, $\fdrift: \R^d \to \R^d$ is a time-dependent drift function that we wish to learn.



Now, by Doob's h-transform, we know that the \ac{SDE} \eqref{eq:SB-SDE} \emph{conditioned} to start at $\x_0$ and end at $\x_1$ is given by another \ac{SDE}  \citep{doob1984classical,rogers2000diffusions}:
\begin{equation}
\label{eq:SB-SD-conditioned}
\dd X_t = \volatsq[\ctime]\bracks*{\fdrift(X_t) + \nabla \log \doob(X_t) }\dt +\volat \dWiener
\end{equation}
where $\doob(\x) \defeq \prob(X_1 = \x_1\vert X_t = \x)$ is the \emph{Doob's $h$ function}. Notice that we have suppressed the dependence of $\doob$ on $\x_0$ and $\x_1$ for notational simplicity.%\footnote{The fact $\doob$ does not depend on the starting point $\x_0$ is easily understood: An \ac{SDE} remains the same no matter where it starts.}



\para{Loss function}

Since both \eqref{eq:BB} and \eqref{eq:SB-SD-conditioned} represent $\Psol$, the solution of the \acp{DSB}, the two \acp{SDE} must coincide. 
%This in turn implies that, for all $(\x_0,\x_1)\sim\Pssol$, we must have
%\begin{equation}
%\fdrift(X_t) + \nabla \log \doob(X_t) =  \frac{\x_1-X_t}{\cvolat[\horizon]-\cvolat[\ctime]}.
%\end{equation}
~In other words, suppose we parametrize $\fdrift$ as $\fdrift^\theta$, then, by matching terms in \eqref{eq:BB} and \eqref{eq:SB-SD-conditioned}, we can learn the optimal parameter $\theta^\star$ via optimization of the loss function
\begin{equation}
\label{eq:loss}
\Loss(\theta) \defeq \exof*{\int_0^1 \norm*{ \frac{\x_1-X_t}{\cvolat[\horizon]-\cvolat[\ctime]}-\nabla \log \doob^\theta(X_t)}^2 \dt  }
\end{equation}
where $\doob^\theta$ depends on $\fdrift^\theta$ as well as the drawn samples $(\x_0,\x_1)$. This is the case since $\doob$ is defined as an expectation using trajectories sampled under $\fdrift^\theta$ with given endpoints. Therefore, assuming that, for each $\theta$, we can compute $\doob^\theta$ \emph{based only on} $\fdrift^\theta$, we can then backprop through \eqref{eq:loss} and optimize it using any off-the-shelf algorithm.


\para{A slightly modified \eqref{eq:loss}}
Even with infinite data and a neural network with sufficient capacity, the loss function defined in \eqref{eq:loss} does not converge to 0. For the purpose of numerical stability, we instead propose to modify \eqref{eq:loss} to:
\begin{equation}
\label{eq:loss_modified}
\Loss(\theta) \defeq \exof*{\int_0^1 \norm*{\frac{\x_1-X_t}{\cvolat[\horizon]-\cvolat[\ctime]}- \left(\fdrift^\theta + \nabla \log \doob^\theta(X_t)\right)}^2 \dt  }
\end{equation}which is clearly equivalent to \eqref{eq:loss} at the true solution of $\fdrift$. Notice that \eqref{eq:loss_modified} bears a similar form as the popular score-matching objective employed in previous works \citep{song2019generative,song2020score}:
\begin{equation}
\label{eq:score_matching}
\Loss(\theta) \defeq \exof*{\int_0^1 \norm*{\nabla \log p(\x_t | \x_0)- s^\theta(X_t, t)}^2 \dt  },
\end{equation}
where the term $\frac{\x_1-X_t}{\cvolat[\horizon]-\cvolat[\ctime]}$ is akin to $\nabla \log p(\x_t | \x_0)$, while $\left(\fdrift^\theta + \nabla \log \doob^\theta(X_t)\right)$ corresponds to $s^\theta(X_t, t)$. 

% \begin{equation}
% \label{eq:loss}
% \Loss(\theta) \defeq \exof*{\int_0^1 \norm*{\x_1-\x_0 + \frac{X_t}{1-t}-(b_{\theta} + \nabla \log \doob^\theta(X_t)})^2\dt  }
% \end{equation}

 \begin{algorithm}[t]
   \caption{\textsc{SBalign}}
   \label{alg:SBalign}
\begin{algorithmic}
   \STATE {\bfseries Input:} Aligned data $(\x^i_0,\x^i_1)_{i=1}^N$, learning rates $\lrf,\lrb$, number of iterations $K$ %\dots  data $x_i$, size $m$
%   \STATE {\bfseries Output:} Optimal forward and backward drifts $\SBfbase_\ctime(\cdot), \SBbbase_\ctime(\cdot)$ for \eqref{eq:GSBflow}
%   \STATE {\bfseries Construct \eqref{eq:GaussianSB}:} Compute the means and covariances of $\distinit, \distend$.
%   \REPEAT
\smallskip
   \STATE Initialize $\paramf \subs \paramf_0$, $\paramb \subs \paramb_0$.
   \FOR{$k=1$ {\bfseries to} $K$} 
   \STATE Draw a mini-batch of samples from $(\x^i_0,\x^i_1)_{i=1}^N$
%   \FOR{$j=1$ {\bfseries to} $\inneriter$}   
%   \IF{$j \mod \caching = 0$}
   \STATE Compute empirical average of \eqref{eq:loss_final} with mini-batch.
%   \ENDIF
%   \STATE Compute $\lossb$ via \eqref{eq:loss-backward}
   \STATE Update $\paramb \subs \paramb - \lrb\nabla \Loss(\theta,\phi)$
%   \ENDFOR
%   \FOR{$j=1$ {\bfseries to} $\inneriter$}   
%   \IF{$j \mod \caching = 0$}
%   \STATE Simulate \eqref{eq:GSB-sde-backward} with $\point_\horizon \sim \distend$
%   \ENDIF
%   \STATE Compute $\lossf$ via \eqref{eq:loss-forward}
   \STATE Update $\paramf \subs \paramf - \lrf\nabla \Loss(\theta,\phi)$
   \ENDFOR
%   \ENDFOR
%   \UNTIL{$noChange$ is $true$}
\end{algorithmic}
\end{algorithm}

\para{Computing $\doob^\theta$}% via the Feymann-Kac Formula}

Inspecting $\doob$ in \eqref{eq:SB-SD-conditioned}, we see that, given $(\x_0,\x_1)$, it can be written as the conditional expectation of an indicator function:
\begin{equation}
\label{eq:h-semigroup}
\doob(\x) = \prob(X_1 = \x_1\vert X_t = \x) = \exof*{\one_{\{\x_1\}}\vert X_t = \x}
\end{equation}where the expectation is over \eqref{eq:SB-SDE}. Functions of the form \eqref{eq:h-semigroup} lend itself well to computation since it solves simulating the \emph{unconditioned} paths.
%since the Feymann-Kac formula, which offers an representation via simulating the \emph{unconditional} paths and has been well-studied in fields such as physics \cite{}, financial engineering \citep{}, and machine learning \citep{}. 
~Furthermore, in order to avoid overfitting on the given samples, it is customary to replace the ``hard'' constraint $\one_{\{\x_1\}}$ by its \emph{smoothed} version \citep{zhang2021path, holdijk2022path}: 
\begin{equation}
\label{eq:softdoob}
\doobs(\x) \defeq \exof*{\exp\parens*{-\frac{1}{2\reg}\norm{X_\horizon-\x_1}^2 }  \vert X_t = \x}.
\end{equation}Here, $\reg$ is a regularization parameter that controls how much we ``soften'' the constraint, and we have $\lim_{\reg\to 0} \doobs = \doob$.


Although the computation of \eqref{eq:softdoob} can be done via a standard application of the Feynman–Kac formula \citep{rogers2000diffusions}, an altogether easier approach is to parametrize $\doobs$ by a second neural network $m^{\phi}$ and perform alternating minimization steps on $\fdrift^\theta$ and $m^{\phi}$. This choice reduces the variance in training, since it avoids the sampling of unconditional paths described by \eqref{eq:SB-SDE} (see Appendix for a detailed explanation).

%To summarize, we have
%%Combining the above, we shall adopt the Feymann-Kac formula for computing $\doobs$:
%\begin{align}
%\label{eq:logdoobs}
%&\nabla \log \doobs(\x) \\ 
%\nn
%&\hspace{5mm}=\lim_{\ctimealt \searrow \ctime} \frac{\mathbb{E}_{\textup{\eqref{eq:SB-SDE}}} \bracks*{  \exp\parens*{-\frac{1}{2\reg}\norm{X_\horizon-\x_1}^2 } \int_\ctime^{\ctimealt} \volat[s]\dWiener \vert X_t = \x  }  }{(\ctimealt-\ctime) \mathbb{E}_{\textup{\eqref{eq:SB-SDE}}} \bracks*{ \exp\parens*{-\frac{1}{2\reg}\norm{X_\horizon-\x_1}^2 }  \vert X_t = \x} }.
%\end{align}
%The generalization of \eqref{eq:logdoobs} to the case of $\fdrift^\theta$ is straightforward. It remains to neural net everything.

\para{Regularization}
Since it is well-known that $\nabla \log\doob$ typically explodes when $\ctime\to 1$ \citep{liu2023learning}, it is important to regularize the behavior of $m^{\phi}$ for numerical stability, especially when $\ctime\to 1$. Moreover, in practice, it is desirable to learn a drift $\fdrift^\theta$ that respects the data alignment \emph{in expectation}: If $(\x_0,\x_1)$ is an input pair, then multiple runs of the \ac{SDE} \eqref{eq:SB-SDE} starting from $\x_0$ should, on average, produce samples that are in the proximity of $\x_1$. This observation implies that we should search for drifts whose corresponding $h$-transforms are diminishing.

A simple way to simultaneously achieve the above two requirements is to add an $\ell^2$-regularization term, resulting in the loss function:
%\eqref{eq:logdoobs} is computationally expensive to simulate, because of repeated function calls to to $\fdrift^\theta$, and also suffers from high variance especially in the initial stages of training. 
%Note $\nabla \log \doobs(\x_1) = 0$. We replace $\nabla \log \doobs(\x)$ with a neural network $m^{\phi}$ such that $\norm{m^{\phi}(\x_1)}$ is minimized. The final loss function looks like
\begin{align}
\label{eq:loss_final}
\Loss(\theta,\phi) &\defeq \mathbb{E} \Bigg[\int_0^1 \norm*{\frac{\x_1-X_t}{\cvolat[\horizon]-\cvolat[\ctime]}- \left(\fdrift^\theta + m^{\phi}(X_t)\right)}^2
\\ &\hspace{35mm}+ \lambda_t \norm{m^{\phi}(\x_t)}^2 \dt \Bigg]
\nonumber
\end{align}where $\lambda_t$ can either be constant or vary with time. The overall algorithm is depicted in \cref{alg:SBalign}.


\subsection{Paired Schr\"odinger bridges as prior processes}
\label{subsec:prior_drift}
% Classical SBs are unsuitable in cases where the alignments are known, because they only consider samples from $\distinit$ and $\distend$ and disregard those drawn from the (optimal) coupling $\pi^\star$. However, the reliance of our method on this crucial knowledge is critical to avoid the necessity of IPF-like iterates but may become a limitation when insufficient information on alignments is available. 

% In such a situation, while it is unrealistic to hope for an accurate solution to the aligned SB problem, the interpolation between $\distinit$ and $\distend$ learned by \textsc{SBalign} (\ref{eq:SB-SDE}) can potentially still be leveraged to obtain a better reference process, when solving a classical SB on the same marginals ---i.e. the term $b_t(X_t)$ learned via \textsc{SBalign} can, in fact, be used \textit{as is} to construct a data-informed alternative $\Tilde{\refpro}$ to the standard Brownian motion (\ref{eq:gtWt}).

% Improved reference processes, either using pre-trained or data-informed ones, have been previously considered in the literature. 
% For instance, both \citet{de2021diffusion} and \citet{chen2021likelihood} use a pre-trained reference process for challenging image interpolation tasks. This approach, however, relies on DSBs trained using the classical score-based generative modeling objective between a Gaussian and the data distribution. It therefore pre-trains the reference process on a related ---but different--- process, i.e., the one mapping Gaussian noise to data rather than $\distinit$ to $\distend$.
% An alternative, proposed by \citet{bunne2022recovering}, draws on the closed-form solution of SBs between two Gaussian distributions, which are chosen to approximate $\distinit$ and $\distend$, respectively.
% Unlike our method, these alternatives construct better prior drifts by falling back to simpler and related tasks, or approximations of the original problem. We instead propose to shape a coarse-grained description of the drift based on alignments sampled directly from $\mathbb{P}_{0,1}$. 
Our algorithm finds solutions to SBs on aligned data by relying on samples drawn from the (optimal) coupling $\pi^\star$. This is what differentiates it from classical SBs --which instead only consider samples from $\hat{\mathbb{P}}_0$ and $\hat{\mathbb{P}}_1$-- and plays a critical role in avoiding IPF-like iterates. However, \textsc{SBalign} reliance on samples from $\pi^\star$ may become a limitation, when the available information on alignments is insufficient. 

If the number of pairings is limited,  it is unrealistic to hope for an accurate solution to the aligned SB problem. However, the interpolation between $\hat{\mathbb{P}}_0$ and $\hat{\mathbb{P}}_1$ learned by \textsc{SBalign} can potentially be leveraged as a starting point to obtain a better reference process, which can then be used when solving a classical SB on the same marginals. In other words, the drift $b^\text{aligned}_t(X_t)$ learned through \textsc{SBalign} can be used \textit{as is} to construct a data-informed alternative $\tilde{\mathbb{Q}}$ to the standard Brownian motion, defined by paths:
\[
    \tilde{X}_t = b^\text{aligned}_t(\tilde{X}_t) dt + g_t dW_t
\]
Intuitively, solving a standard SB problem with $\tilde{\mathbb{Q}}$ as reference is beneficial because the (imperfect) coupling of marginals learned by \textsc{SBalign} ($\tilde{\mathbb{Q}}_{01}$) is, in general, closer to the truth than $\mathbb{Q}_{01}$.

Improving reference processes through pre-training or data-dependent initialization has been previously considered in the literature. For instance, both \citet{de2021diffusion} and \citet{chen2021likelihood} use a pre-trained reference process for challenging image interpolation tasks. This approach, however, relies on DSBs trained using the classical score-based generative modeling objective between a Gaussian and the data distribution. It, therefore, pre-trains the reference process on a related --but different-- process, i.e., the one mapping Gaussian noise to data rather than $\hat{\mathbb{P}}_0$ to $\hat{\mathbb{P}}_1$.
An alternative, proposed by \citet{bunne2022recovering} draws on the closed-form solution of SBs between two Gaussian distributions, which are chosen to approximate $\hat{\mathbb{P}}_0$ and $\hat{\mathbb{P}}_1$, respectively.
Unlike our method, these alternatives construct prior drifts by falling back to simpler and related tasks, or approximations of the original problem. We instead propose to shape a coarse-grained description of the drift based on alignments sampled directly from $\pi^\star_{01}$. 

\section{Experiments}
\label{sec:experiments}
In this section, we evaluate \textsc{SBalign} in different settings involving 2-dimensional synthetic datasets, the task of reconstructing cellular differentiation processes, as well as predicting the conformation of a protein structure and its ligand formalized as rigid protein docking problem.

\subsection{Synthetic Experiments}
\label{sec:synthetic}

\begin{figure*}[ht]
    \centering
    \includegraphics[width=\textwidth]{figures/fig_results_synthetic.jpg}
    \caption{Experimental results on the Moon dataset (\textbf{a-c}) and T-dataset (\textbf{d-f}). The top row shows the trajectory sampled using the learned drift, and the bottom row shows the matching based on the learnt drift. Compared to other baselines, \textsc{SBalign} is able to learn an appropriate drift respecting the true alignment. (\textbf{f}) further showcases the utility of \textsc{SBalign}'s learnt drift as a suitable reference process to improve other training methods.}
    \label{fig:results_spiral}
\end{figure*}

\begin{figure*}[ht]
    \centering
    \includegraphics[width=\textwidth]{figures/fig_cell_trajectories_matchings.jpg}
    \caption{Cell differentiation trajectories based on (\textbf{a}) the ground truth and (\textbf{b-d}) learned drifts. \textsc{SBalign} is able to learn an appropriate drift underlying the true differentiation process while respecting the alignment. (\textbf{d}) Using the learned drift from \textsc{SBalign} as a reference process helps improve the drift learned by other training methods.}
    \label{fig:results_cell_traj}
\end{figure*}

We run our algorithm on two synthetic datasets (Figures in Appendix), and compare the results with classic diffusion Schr\"odinger bridge models, i.e., the forward-backward SB formulation proposed by \cite{chen2021likelihood}, herein referred to as \textsc{fbSB}. We equip the baseline with prior knowledge, as elaborated below, to further challenge \textsc{SBalign}.

\para{Moon dataset}
The first synthetic dataset (Fig.~\ref{fig:results_spiral}a-c) consists of two distributions, each supported on two semi-circles ($\distinit$ drawn in \textit{blue} and $\distend$ in \textit{red}).
$\distend$ was obtained from $\distinit$ by applying a clockwise rotation around the center, i.e., by making points in the upper blue arm correspond to those in the right red one.
This transformation is clearly not the most likely one under the assumption of Brownian motion of particles and should therefore not be found as the solution of a classical SB problem. 
This is confirmed by \textsc{fbSB} trajectories (Fig.~\ref{fig:results_spiral}a), which tend to map points to their closest neighbor in $\distend$ (e.g., some points in the upper arm of $\distinit$ are brought towards the left rather than towards the right). 
While being a minimizer of \eqref{eq:SB}, such a solution completely disregards our prior knowledge on the alignment of particles, which is instead reliably reproduced by the dynamics learned by \textsc{SBalign} (Fig.~\ref{fig:results_spiral}c).

One way of encoding this additional information on the nature of the process is to modify $\refpro$ by introducing a clockwise radial drift, which describes the prior tangential velocity of particles moving circularly around the center.
Solving the classical SB with this updated reference process indeed generates trajectories that respect most alignments (Fig.~\ref{fig:results_spiral}b), but requires a hand-crafted expression of the drift that is only possible in very simple cases.

\para{T dataset}
In most real-world applications, it is very difficult to define an appropriate reference process $\refpro$, which respects the known alignment without excessively distorting the trajectories from a solution to \eqref{eq:SB}. This is already visible in simple examples like (Fig.~\ref{fig:results_spiral}d-f), in which the value of good candidate prior drifts at a specific location needs to vary wildly in time.
In this dataset, $\distinit$ and $\distend$ are both bi-modal distributions, each supported on two of the four extremes of an imaginary T-shaped area.
We target alignments that connect the two arms of the T as well as the top cloud with the bottom one. We succeed in learning them with \textsc{SBalign} (Fig.~\ref{fig:results_spiral}e) but unsurprisingly fail when using the baseline \textsc{fbSB} (Fig.~\ref{fig:results_spiral}d) with a Brownian motion prior.

\looseness -1 In this case, however, attempts at designing a better reference drift for \textsc{fbSB} must take into account the additional constraint that the horizontal and vertical particle trajectories intersect (see Fig.~\ref{fig:results_spiral}e), i.e., they cross the same area at times $t_h$ and $t_v$ (with $t_h > t_v$). This implies that the drift $b_t$, which initially points downwards (when $t < t_v$), should swiftly turn rightwards (for $t > t_h$).
Setting imprecise values for one of $t_h$ and $t_v$ when defining custom reference drifts for classical SBs would hence not lead to the desired result and, worse, would actively disturb the flow of the other particle group.

\looseness -1 As described in \S~\ref{subsec:prior_drift}, in presence of hard-to-capture requirements on the reference drift, the use of \textsc{SBalign} offers a remarkably easy and efficient way of learning a parameterization of it. For instance, when using the drift obtained by \textsc{SBalign} as reference drift for the computation of the SB baseline (\textsc{fbSB}), we find the desired alignments (Fig.~\ref{fig:results_spiral}f).

\begin{figure*}[ht]
    \centering
    \includegraphics[width=\textwidth]{figures/fig_cell_pred_types.pdf}
    \caption{Cell type prediction on the differentiation dataset. All distributions are plotted on the first two principal components. \textbf{a-b:} Ground truth cell types on day 2 and day 4 respectively. \textbf{c-d:} \textsc{fbSB} and \textsc{SBalign} cell type predictions on day 4. \textsc{SBalign} is able to better model the underlying differentiation processes and capture the diversity in cell types.}
    \label{fig:results_cell_class}
\end{figure*}

\subsection{Cell Differentiation}
\label{sec:cell}

\looseness -1 Biological processes are determined through heterogeneous responses of single cells to external stimuli, i.e., developmental factors or drugs. Understanding and predicting the dynamics of single cells subject to a stimulus is thus crucial to enhance our understanding of health and disease and the focus of this task.
Most single-cell high-throughput technologies are destructive assays ---i.e., they destroy cells upon measurement--- allowing us to only measure \textit{unaligned} snapshots of the evolving cell population. Recent methods address this limitation by proposing (lower-throughput) technologies that keep cells alive after transcriptome profiling \citep{chen2022live} or that genetically tag cells to obtain a clonal trace upon cell division \citep{weinreb2020lineage}.

\para{Dataset} To showcase \textsc{SBalign}'s ability to make use of such (partial) alignments when inferring cell differentiation processes, we take advantage of the genetic barcoding system developed by \citet{weinreb2020lineage}. With a focus on fate determination in hematopoiesis, \citet{weinreb2020lineage} use expressed DNA barcodes to clonally trace single-cell transcriptomes over time. The dataset consists of two snapshots: the first, recorded on day 2, when most cells are still undifferentiated (see Fig.~\ref{fig:results_cell_class}a), and a second, on day 4, comprising many different mature cell types (see Fig.~\ref{fig:results_cell_class}b). Using \textsc{SBalign} as well as the baseline \textsc{fsSB}, we attempt to reconstruct cell evolution between day 2 and day 4, all while capturing the heterogeneity of emerging cell types. More details on the dataset can be found in the Appendix.


\para{Baselines} \looseness -1 We benchmark \textsc{SBalign} against previous \acp{DSB} such as \citep[\textsc{fbSB}]{chen2021likelihood} and also use it to learn a prior reference process. Cell division processes and subsequently the propagation of the barcodes are naturally very noisy. While this genetic annotation provides some form of assignment, it does not capture the full developmental process. We thus test \textsc{SBalign} in a setting where it learns a prior from such partial alignments and, plugged into \textsc{fbSB}, is fine-tuned on the full dataset.
 
\para{Evaluation metrics} To assess the performance of \textsc{SBalign} and the baselines, we monitor several metrics, which include distributional distances, i.e., MMD~\citep{gretton2012kernel} and $\text{W}_{\epsilon}$~\citep{cuturi2013sinkhorn}, as well as average (perturbation scores), i.e., $\ell_2(\text{PS})$ \citep{bunne2022supervised} and RMSD. Moreover, we also train a simple neural network-based classifier to annotate the cell type on day 4 and we report the accuracy of the predicted vs. actual cell type for all the models (more details in the Appendix).

\looseness -1 \para{Results} \textsc{SBalign} finds matchings between cell states on days 2 and 4 (Fig.~\ref{fig:results_cell_traj}c, bottom) which resemble the observed ones (Fig.~\ref{fig:results_cell_traj}a) but also reconstructs the entire evolution path of transcriptomic profiles (Fig.~\ref{fig:results_cell_traj}c, top).
It outperforms the baseline \textsc{fbSB} (Tab.~\ref{tab:results_cells}) in all metrics: Remarkably, our method exceeds the performances of the baseline also on distributional metrics and not uniquely on alignment-based ones.
We also leverage \textsc{SBalign} predictions to recover the type of cells at the end of the differentiation process (Fig.~\ref{fig:results_cell_class}d): We train a classifier on differentiated cells observed on day 4, and subsequently classify our predictions. % and applying it to cell statuses predicted by our method.
While capturing the overall differentiation trend, \textsc{SBalign} (as well as \textsc{fbSB}) struggles to isolate rare cell types.
Lastly, we employ \textsc{SBalign} to learn a prior process from noisy alignments based on genetic barcode annotations. When using this reference process within \textsc{fbSB}, we learn an SB which compensates for inaccuracies stemming from the stochastic nature of cell division and barcode redistribution and which achieves better scores on distributional metrics (Tab.~\ref{tab:results_cells}). Additional results can be found in the Appendix.

\begin{table}[ht]
    \caption{\textbf{Cell differentiation prediction results.} Means and standard deviations (in parentheses) of distributional metrics (MMD, $\text{W}_{\epsilon}$), alignment-based metrics ($\ell_2$, RMSD), and cell type classification accuracy.}
    \label{tab:results_cells}
     \centering
    \adjustbox{max width=\linewidth}{%
    \begin{tabular}{lccccc}
    \toprule
     & \multicolumn{5}{c}{\textbf{Cell Differentiation}} \\
    \cmidrule(lr){2-6}
    \textbf{Methods} & MMD $\downarrow$ & $\text{W}_\varepsilon \downarrow$ & $\ell_2(\text{PS}) \downarrow$ & RMSD $\downarrow$ & Class. Acc. $\uparrow$ \\
    \midrule
     \textsc{fbSB}& \makecell{1.55e-2\\(0.03e-2)} & \makecell{12.50\\(0.04)} & \makecell{4.08\\(0.04)} & \makecell{9.64e-1\\(0.02e-1)} & \makecell{56.2\%\\(0.7\%)} \\
     \makecell{\textsc{fbSB} with\\\textsc{SBalign}} & \makecell{5.31e-3\\(0.25e-3)} & \makecell{10.54\\(0.08)} & \makecell{0.99\\(0.12)} & \makecell{9.85e-1\\(0.07e-1)} & \makecell{47.0\%\\(1.5\%)} \\
     \textsc{\bf{\textsc{SBalign}}}& \makecell{1.07e-2\\(0.01e-2)} & \makecell{11.11\\(0.02)} & \makecell{1.24\\(0.02)} & \makecell{9.21e-1\\(0.01e-1)} & \makecell{56.3\%\\(0.7\%)} \\
     \bottomrule
    \end{tabular}
}
\end{table}

\subsection{Protein Docking}

Proteins are dynamic, flexible biomolecules that form complexes upon interaction with other biomolecules. This is a central step in many biological processes, namely signal transduction, DNA replication, and repair. The formation of complexes is guided by appropriate energetics, best orienting the participating proteins relative to each other, along with a dynamic alteration in structure (conformational changes). Modelling this process is thus a central problem in biology and could allow one to engineer protein interactions for desired responses. In ({\em computational}) protein docking, the goal is to predict the 3D structure of the bound (docked) state of a protein pair, given the unbound states of the corresponding proteins. These proteins are denoted (arbitrarily) as the ligand and receptor respectively.

A comprehensive treatment of the protein docking problem is still elusive, owing to the lack of high-quality large datasets comprising 3D structures of participating proteins in the unbound and bound states. We tackle, instead, two related subproblems: (i) prediction of conformational changes between unbound and bound states of proteins and (ii) identification of the best orientation between interacting proteins, modeled as rigid bodies. This separation into related subproblems was also adopted in \citep{dominguez2003haddock}, one of the earliest works for the full protein docking problem.

\subsubsection{Conformational Changes in Proteins}
In this task, we are interested in predicting the 3D structure of the bound state of a protein, given the 3D structure in the unbound state. While it is possible to frame this problem as a ({\em conditional}) point cloud translation, an approach using Schr\"odinger bridges is more natural since it leverages the flexibility of proteins and accounts for the underlying stochasticity in the conformational change process.

\para{Dataset} The task of modeling conformational changes starting from a given protein structure is largely unexplored, mainly due to the lack of high-quality large datasets. Here we utilize the recently proposed D3PM dataset \citep{peng2022d3pm} that provides protein structures before ({\em apo}) and after ({\em holo}) binding, covering various types of protein motions. We generate samples by collecting Protein Data Bank (PDB) entries containing the same protein bound to different biomolecules and applying additional quality-control criteria. We only focus on protein pairs where the provided Root Mean Square Deviation (RMSD) of the C$\alpha$ carbon atoms between unbound and bound 3D structures is $>3.0$\r{A}, which amounts to 2370 examples in the D3PM dataset.

For each pair of structures, we first identify common residues and compute the RMSD between C$\alpha$ carbon atoms of the common residues after superposition using the Kabsch \citep{kabsch1976solution} algorithm. The pair is accepted only if the relative error between the computed and provided C$\alpha$ RMSD is less than 0.1. The rationale here is to only retain examples where we can reconstruct the provided RMSD values. The resulting dataset has 1591 examples, which is then divided into a train/valid/test split of 1291/150/150 examples respectively (see Appendix).  

\para{Baselines} Since the goal of the task is to predict 3D structures, our model must satisfy the relevant SE(3) symmetries of rotation and translations. To this end, we evaluate \textsc{SBAlign} against the \textsc{EGNN} model \citep{satorras2021n}, which satisfies the SE(3) symmetries and is a popular architecture used in many point-cloud transformation tasks \citep{satorras2021n, hoogeboom2022equivariant}. 

\begin{table}[ht]
    \caption{\textbf{Conformational changes results.} RMSD between predicted and true structures in the bound state. The first term (parentheses) refers to the number of poses sampled, and the second term refers to the number of simulation steps.
    }
    \label{tab:results_conf_changes}
    \centering
    \adjustbox{max width=\linewidth}{%
    \begin{tabular}{lccc|ccc}
    \toprule
     & \multicolumn{6}{c}{\textbf{D3PM Test Set}} \\
     & \multicolumn{3}{c}{RMSD (\r{A})} & \multicolumn{3}{c}{\% RMSD (\r{A}) $< \tau$}  \\
    \cmidrule(lr){2-7}
    \textbf{Methods} & Median & Mean & Std & $\tau = 2$ & $\tau=5$ & $\tau=10$\\
    \midrule
     \textsc{\textsc{EGNN}}& 19.99 & 21.37 & 8.21 & 1\% & 1\% & 3\% \\
     \textsc{\bf{\textsc{SBAlign}}} (10, 10) & 3.80 & 4.98 & 3.95 & 0\% & 69\% & 93\%\\
     \textsc{\bf{\textsc{SBAlign}}} (10, 100) & 3.81 & 5.02 & 3.96 & 0\% & 70\% & 93\%\\
     \bottomrule
    \end{tabular}
}
\end{table}

\para{Results} 
To evaluate our model, we report (Tab.~\ref{tab:results_conf_changes}) summary statistics of the RMSD between the C$\alpha$ carbon atoms of the predicted structure and the ground truth, and the fraction of predictions with RMSD values $<2.0, 5.0$ and $10.0$\r{A}.
\textsc{SBAlign} outperforms \textsc{EGNN} by a large margin and is able to predict almost 70\% examples with an RMSD$<5$\r{A}. One of the drawbacks attributed to diffusion models is their slow sampling speed, owing to multiple function calls to a neural network. Remarkably, our model is able to achieve impressive performance with just 10 steps of simulation. We leave it to future work to explore the tradeoff between sampling speed and quality of the predicted conformations.

\begin{figure*}[ht]
    \centering
    \includegraphics[width=.9\textwidth]{figures/fig_pred_1QA9.pdf}
    \caption{Ground truth and predicted bound structures for the complex with PDB ID: 1QA9. \textsc{SBalign} is able to find the true binding interface compared to \textsc{EquiDock}}
    \label{fig:results_docked_1QA9}
\end{figure*}

\subsubsection{Rigid Protein Docking}

In this task, we want to identify the best relative orientation between the two proteins, modeled as rigid bodies.

\para{Experimental setup} Our setup follows a similar convention as \textsc{EquiDock} \citep{ganea2022independent}. To summarize, the unbound structure of the ligand is derived by applying a random rotation and translation to the corresponding bound structure, while the receptor is held fixed w.l.o.g. Applying a different rotation and translation to each ligand can however result in a different Brownian bridge for each complex, resulting in limited meaningful signal for learning $\fdrift^\theta$. To avoid this, we sample a rotation and translation at the start of training and apply the same rotation and translation to all complexes across training, validation, and testing (more details in the Appendix).

\para{Dataset} We evaluate our method on the DB5.5 dataset \citep{vreven2015updates} which is a standard choice for protein-protein docking but contains only 253 complexes. We use the same splits as EquiDock \citep{ganea2022independent} --containing 203/25/25 complexes in the training, validation and test sets respectively--
and show the results in Tab.~\ref{tab:results_docking}. For ligands in the test set, we generate the corresponding unbound versions by applying the rotation and translation sampled during training. We compare our method to \textsc{EquiDock} as well as to traditional docking software (see Appendix for details).

\begin{table}
    \caption{\textbf{Rigid docking results.} Complex and interface RMSD between predicted and true bound structures (after Kabsch alignment). Comparison with values reported in \citep{ganea2022independent} can be found in the Appendix.
    }
    \label{tab:results_docking}
    \centering
    \adjustbox{max width=\linewidth}{%
    \begin{tabular}{lcccccc}
    \toprule
     & \multicolumn{6}{c}{\textbf{DB5.5 Test Set}} \\
     & \multicolumn{3}{c}{Complex RMSD} & \multicolumn{3}{c}{Interface RMSD}  \\
    \cmidrule(lr){2-7}
    \textbf{Methods} & Median & Mean & Std & Median & Mean & Std\\
    \midrule
     % \textsc{Attract}$^*$ & 9.55 & 10.09 & 9.88 & 7.48 & 10.69 & 10.90 \\
     % \textsc{HDock}$^*$ & 0.30 & 5.34 & 12.04 & 0.24 & 4.76 & 10.83 \\
     % \textsc{ClusPro}$^*$& 3.38 & 8.25 & 7.92 & 2.31 & 8.71 & 9.89 \\
     % \textsc{PatchDock}$^*$ &  18.26 & 18.00 & 10.12 & 18.88 & 18.75 &  10.06 \\
     % \textsc{\textsc{EquiDock}}$^*$&  14.13 & 14.72  & 5.31 &  11.97 & 13.23 & 4.93  \\
     % \cmidrule(lr){1-7}
     \textsc{\textsc{EquiDock}}& 14.12 & 14.73 & 5.31 & 11.97 & 13.23 & 4.93 \\
     \textsc{\bf{\textsc{SBalign}}}& 6.59 & 6.69 & 2.04 & 7.69 & 8.11 & 2.39 \\
     \bottomrule
    \end{tabular}
}
\end{table}

\para{Evaluation metrics} We report two metrics, Complex RMSD and Interface RMSD. Following \citep{ganea2022independent}, we first superimpose the ground truth and the predicted complex structures using the Kabsch algorithm \citep{kabsch1976solution}, and then calculate Complex RMSD. A similar procedure is used for computing Interface RMSD, but only using the residues from the two proteins that are within $8\,$\r{A} of each other (see Appendix for more details).

\para{Results} \textsc{SBalign} considerably outperforms \textsc{EquiDock} across all metrics (Table~\ref{tab:results_docking}). An example of docked structures, in direct comparison with \textsc{EquiDock} is displayed in Fig.~\ref{fig:results_docked_1QA9}, with more visualizations \& results in the Appendix.
% \ref{fig:results_docked_1NW9}, and \ref{fig:results_docked_1JIW}.

\para{Future outlook} In this section, we presented a proof of concept application of \textsc{SBAlign} for the subproblems associated with the protein docking task. While \textsc{SBAlign} provides a principled method to model conformational changes, our setup for rigid protein docking is limited by utilizing the same rotation and translation across training and testing. A combination of \textsc{SBAlign} for conformational change modeling, with more recent methods for rigid-protein docking \citep{ketata2023diffdock} can provide a complete solution for the protein docking task, which we leave to future work.

\section{Conclusion}
\label{sec:conclusion}
In this paper, we propose a new framework to tackle the interpolation task with aligned data via \aclp{DSB}. Our central contribution is a novel algorithmic framework derived from the Schr\"odinger bridge theory and Doob's $h$-transform.
Via a combination of the two notions, we derive novel loss functions which, unlike all prior methods for solving \aclp{DSB}, do not rely on the \acl{IPF} procedure and are hence numerically stable. We verify our proposed algorithm on various synthetic and real-world tasks and demonstrate noticeable improvement over the previous state-of-the-art, thereby substantiating the claim that data alignment is a highly relevant feature that warrants further research. 

\begin{acknowledgements}
This publication was supported by the NCCR Catalysis (grant number 180544), a National Centre of Competence in Research funded by the Swiss National Science Foundation as well as the European Union’s Horizon 2020 research and innovation programme 826121. We thank Caroline Uhler for introducing us to the dataset by \citet{weinreb2020lineage}, which was instrumental in this research.
\end{acknowledgements}

\bibliography{somnath_658}


% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% % APPENDIX
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% \clearpage
% \numberwithin{equation}{section}		% for numbering  in the appendix
% \numberwithin{lemma}{section}		% for numbering  in the appendix
% \numberwithin{proposition}{section}		% for numbering  in the appendix
% \numberwithin{theorem}{section}		% for numbering in the appendix
% %----------------------------------------------------------------------
% %%% PROOF OF GAUSSIAN SB
% %----------------------------------------------------------------------
% %\section{Proof of the Closed-Form Solutions for Gaussian Schr\"odinger Bridges}
% %\label{app:GaussianSB}
% %\input{appendix/appendix_gsb.tex}
% \input{appendix/appendix.tex}

% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\end{document}