%\documentclass{uai2022} % for initial submission
 \documentclass[accepted]{uai2022} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2022} % ptmx math instead of Computer
                                         % Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2022} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

% customized package and commands
%\usepackage{parskip}
%\usepackage[utf8]{inputenc}
%\usepackage{amsthm, amsmath, amssymb}
%\usepackage{amsfonts}
%\usepackage{mathtools}

%\usepackage{dsfont}
%\usepackage[toc,page]{appendix}
%\usepackage{xcolor}
%\usepackage{graphicx}
%\usepackage{algorithm}
%\usepackage{algpseudocode}
%\usepackage{accents}
%\usepackage{ulem} 
%\usepackage{hyperref}
%\renewcommand{\arraystretch}{1.3}

%\allowdisplaybreaks

%\newtheorem{lemma}{Lemma}
%\newtheorem{theorem}{Theorem}
%\newtheorem{proposition}{Proposition}
%\newtheorem{corollary}{Corollary}
%\newtheorem{assumption}{Assumption}
%\newtheorem*{remark}{Remark}
%\newtheorem{definition}{Definition}[section]

%\newcommand{\Ocal}{\mathcal{O}}


%\renewcommand{\qed}{\phantom{z} \hfill$\blacksquare$}


%\makeatletter
%\newtheorem*{rep@theorem}{\rep@title}
%\newcommand{\newreptheorem}[2]{%
%\newenvironment{rep#1}[1]{%
% \def\rep@title{#2 \ref{##1}}%
% \begin{rep@theorem}}%
% {\end{rep@theorem}}}
%\makeatother

%\newreptheorem{theorem}{Theorem}
%\newreptheorem{lemma}{Lemma}

%\newcommand{\tab}{~~~~}

\usepackage{xr}
\externaldocument{kash_613-supp}
%\usepackage{zref-xr}%, zref-user}
%%\zxrsetup{toltxlabel}
%\zexternaldocument*{kash_613-supp}

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
%\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
%\usepackage{booktabs} % commands to create good-looking tables
%\usepackage{tikz} % nice language for creating drawings and diagrams

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
%\newcommand{\swap}[3][-]{#3#1#2} % just an example


% The standard author block has changed for UAI 2022 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors



\usepackage{xspace,amsmath,amsfonts,bm,bbold,appendix}
\usepackage{amssymb}
%\DeclareMathOperator*{\argmax}{arg\,max}
%\DeclareMathOperator*{\argmin}{arg\,min}
%\usepackage{comment}
\usepackage{graphicx}
%\usepackage{algorithmic}
\usepackage[ruled,linesnumbered]{algorithm2e}
\usepackage{ifthen}
\usepackage{subcaption}

\newtheorem{lemma}{Lemma}
\newtheorem{theorem}{Theorem}
\newtheorem{hyp}{Hypothesis}
\newtheorem{Observation}{Observation}
\newtheorem{corollary}{Corollary}
%\DeclareMathOperator*{\argmax}{arg\,max}

\newcommand{\comment}[1]{}
\newcommand{\Secl}[1]{\label{sec:#1}}
\newcommand{\Secr}[1]{Sec.~\ref{sec:#1}}
\newcommand{\Algl}[1]{\label{alg:#1}}
\newcommand{\Algr}[1]{Alg.~\ref{alg:#1}}
\newcommand{\eql}[1]{\label{eqn:#1}}
\newcommand{\eqr}[1]{Eqn.~\ref{eqn:#1}}
\newcommand{\Appl}[1]{\label{app:#1}}
\newcommand{\Appr}[1]{App.~\ref{app:#1}}
\newcommand{\Obl}[1]{\label{Ob:#1}}
\newcommand{\Leml}[1]{\label{Lem:#1}}
\newcommand{\Thl}[1]{\label{Th:#1}}
\newcommand{\Crll}[1]{\label{Cr:#1}}
\newcommand{\Eql}[1]{\label{eq:#1}}
\newcommand{\Figl}[1]{\label{fig:#1}}

\newcommand{\Obr}[1]{Ob.~\ref{Ob:#1}}
\newcommand{\Lemr}[1]{Lem.~\ref{Lem:#1}}
\newcommand{\Thr}[1]{Th.~\ref{Th:#1}}
\newcommand{\Crlr}[1]{Cor.~\ref{Cr:#1}}
\newcommand{\Eqr}[1]{Eq.~(\ref{eq:#1})}
\newcommand{\Figr}[1]{Fig.~\ref{fig:#1}}

\newcommand{\demand}{D}
\newcommand{\mass}{M}
\newcommand{\eempty}{E}
\newcommand{\strategy}{\pi}
\newcommand{\policy}{\pi^E}

\newcommand{\argmax}{\mathrm{arg\,max}}
%\newcommand{\demand}{d\xspace}
%\newcommand{\nregion}{R}
%\newcommand{\moving}{m}
\newcommand{\price}{p}
%\newcommand{\transitionmatrix}{{\mathcal T}}
\newcommand{\mctransition}{T}
%\newcommand{\transition}{\mctransition}


%\newcommand{\stam}[2]{\mbox{diff}(#1, #2)}
\newcommand{\realize}[1]{\hat{#1}}
\newcommand{\lattice}{\mathcal{L}}
\newcommand{\aux}{\mathit{aux}}
%real demand according to the demand satisfaction constraint
%\newcommand{\realdemand}{V}
%mass distribution
%\newcommand{\massdist}{x} 
\newcommand{\vecc}[1]{\mathbb{#1}}
\newcommand{\sigr}[1]{#1 \vecc{1}}
\newcommand{\sigc}[1]{\vecc{1} #1}
\newcommand{\mb}[1]{\mathbf{#1}}
%\newcommand{\sigr}[1]{\bm{#1 1}}
%\newcommand{\sigc}[1]{\bm{1 #1}}

\newcommand{\next}[1]{\mathit{next}(#1)} % value of #1 at the next state
\newcommand{\best}[1]{\mathit{best}(#1)} % set of best replies value to #1
%\newcommand{\vecc}[1]{\mathbf{#1}}         % all #1-vector 

\newcommand{\Proof}{\par\noindent\textbf{Proof:}~}
\newcommand{\QED}{\hfill $\bowtie$ \medskip}

\newcommand{\curM}{\mathit{M}}
%\newcommand{\curV}{\mathit{T}}
\newcommand{\curm}{\mathit{m}}
\newcommand{\tmin}{\mathit{\Delta q}}
\newcommand{\band}{~\wedge~}
\newcommand{\bor}{~\vee~}
\newcommand{\true}{\sc T}
\newcommand{\false}{\sc F}
\newcommand{\er}[1]{\sc{Erg}(#1)}  % ergodic matrix

\newcommand{\nextpi}[1]{\mathit{next}_{#1}}
\newcommand{\nuu}[1]{\boldsymbol{\nu}\mb{(#1)}}
\newcommand{\lsq}{(\hspace{-2pt}[} 
\newcommand{\rsq}{]\hspace{-2.25pt})}
\newcommand{\maxz}[1]{\lsq #1 \rsq_{_+}}
%\newcommand{\maxz}[1]{ [#1]_{_+}}
\newcommand{\greedy}{{\sc Gdy}\xspace}
\newcommand{\dynamic}{{\sc Con}\xspace}
\newcommand{\lb}{{\sc Lb}\xspace}
\newcommand{\static}{{\sc Sta}\xspace}
\newcommand{\hmr}{{\sc Hmr}\xspace}
\newcommand{\lka}{{\sc Lka}\xspace}

\begin{document}

\title{Dynamic Relocation in Ridesharing via Fixpoint Construction}


\author[1]{Ian A. Kash}
\author[1]{Zhongkai Wen}
\author[1]{Lenore D. Zuck}
% Add affiliations after the authors
\affil[1]{%
    %Computer Science \\
    University of Illinois Chicago\\
    Chicago, IL, USA
}


%%
%% By default, the full list of authors will be used in the page
%% headers. Often, this list is too long, and will overlap
%% other information printed in the page headers. This command allows
%% the author to define a more concise list
%% of authors' names for this purpose.
%\renewcommand{\shortauthors}{Trovato and Tobin, et al.}

\maketitle

%%
%% The abstract is a short summary of the work to be presented in the
%% article.
\begin{abstract}
%V0
%Ridesharing platforms face frequent imbalances between the supply of drivers and demand for rides in a particular location, so effective relocation of drivers to locations with higher demand is important for efficiency.  Despite this, there are relatively few algorithms designed to handle changing demand patterns, and those that do are generally heuristic adaptations of algorithms for static demand patterns.  We provide the first algorithm with convergence guarantees for this setting.  To do so, we develop a novel constructive analysis of the fixpoints of relocation dynamics under static demands which is of independent interest.
%V1
To address spatial imbalances in the supply and demand of drivers, ridesharing platforms can make use of policies to direct driver relocation.  We study a simple model of this problem, which allows us to give a constructive characterization of the unique fixpoint of system dynamics.  Using this construction, we design a dynamic policy that provides stronger, than previous work,  guarantees about its rate of convergence to the fixpoint.  Simulations demonstrate the benefits of our approach.
\end{abstract}



%\input{introduction}
\section{Introduction}

Ridesharing platforms such as Didi, Lyft, and Uber match passengers in need of transportation with drivers who can provide it.  As drivers provide service to passengers, they themselves travel and so may end up in a new region when seeking to next provide service. 
Unbalanced demand to and from a region leads to an excess or shortage of drivers in that region 
and calls for a policy for \emph{relocation} of drivers. 
Designing good relocation policies that direct drivers from regions where there is an excess to regions where there is a shortage
is therefore a key challenge both for providing efficient service~\citep{afeche2018ride} and minimizing environmental costs~\citep{ward2021air}.

Prior work has developed a number of models of the problem of designing relocation policies, including in the context of other decisions such as pricing.
%% LDZ "in isolation" not clear here.  Which is important since our claim here (as I read it) that we're handling optimizing a gestalt rather than a single objective. 
%%Does this read better?
Many of these models are, however,  quite complex and the results are often focused on optimizing a particular objective such as revenue or availability. 
%% explain why this is a problem -- why optimization of a particular objective leaves basic questions about convergence open. I'm not getting it from reading what you wrote and I myself cannot write it. 
%This makes quite basic questions about the convergence of the systems under a particular policy challenging to answer and in some cases unresolved.  
%% Rewrote and expanded
This leads to several related challenges.  In some analyses it is simply assumed that the system has achieved some notion of steady state or equilibrium without justification of how the system dynamics lead there~\citep{bimpikis2019spatial,besbes2021surge}.  Furthermore, this steady-state behavior may only be examined for an optimal policy for a particular objective, leaving the behavior of the system if a policy is chosen to address another objective~\citep{bimpikis2019spatial,hosseini2021dynamic,iglesias2019bcmp,zhang2016control}.  Even for those few analyses that have addressed the convergence behavior of arbitrary policies, the complexity of the model has typically lead to a non-constructive analysis~\citep{braverman2019empty}.

%% LDZ: too abrupt here.  Maybe a softer "In this paper we address the problem (that we just defined above) by ... 
In this paper we address these problems by adopting
a simple stylized model that can be thought of as a special case or limiting behavior of a number of models in the literature (see Section~\ref{sec:related}).  
In it, the area where the ridesharing platform operates is divided into $r$ regions and there is a fixed total mass of drivers available to serve passengers.  Time is discrete and at each step a fixed mass of passengers seek service in each region.  The drivers currently in that region carry passengers to their destination.  If the mass of drivers exceeds the mass of passengers in a region, the remaining drivers relocate according to a fixed policy (which may include staying where they are).  Both carrying passengers and relocating take a single time unit.  If the mass of passengers exceeds the mass of drivers in a region, the excess passengers are simply not served.

The simplicity of our model allows us to provide concise arguments that establish a number of key properties.
\begin{itemize}
    \item Each combination of policy and total mass of drivers has a unique fixpoint of the system dynamics.
    \item Starting from any initial conditions, the system dynamics converge to this unique fixpoint.
    \item The fixpoint is continuous and monotone in the mass of drivers and can be constructed via the stationary distributions of a linear number of Markov chains.
\end{itemize}
Our arguments are based on the analysis of a piecewise linear generalization of Markov chains which may be of independent interest.

As an example of the benefits of such a rigorous understanding of the behavior of the system, we analyze the problem of dynamically adjusting the policy to converge to the fixpoint as rapidly as possible from arbitrary initial conditions.  We introduce a dynamic policy that makes use of our fixpoint construction and is the first to have guarantees about its rate of convergence and the welfare loss while converging relative to welfare in the chosen fixpoint.  Simulations based on data from Didi show superior performance to prior approaches for handling stochastic demands.  Additional simulations on synthetic data show that it converges substantially faster than previous heuristic approaches.  In our simple model these previous approaches target maximizing efficiency and our approach matches their performance while being more flexible in its ability to target other metrics such as availability.

\subsection{Related Work}
\label{sec:related}

Our model is part of a growing literature that analyzes the stationary behavior of ridesharing systems either in a direct formulation as optimization problem~\citep{bimpikis2019spatial,pavone2012robotic} or as the fluid limit of a queueing model~\citep{braverman2019empty,hosseini2021dynamic,iglesias2019bcmp,banerjee2017pricing,zhang2016control,waserhole2012vehicle}.  Indeed, our model is a special case of a number of these.  There are also models of service networks which lack the crucial aspect of ridesharing that providing service changes which customers can be served in the future~\citep{caldentey2009fcfs,adan2012exact,gurvich2014dynamic}.

Closest to our theoretical results, \citet{braverman2019empty} analyze the dynamics of a queueing system with a fluid limit, which generalizes our model.  As we do, they show convergence to a unique fixpoint.  Their analysis is non-constructive and substantially more complex.  They examine  dynamic relocation but provide only simple heuristics.  Our analysis is constructive and simpler, and we provide theoretical guarantees for dynamic relocation. 

\citet{hosseini2021dynamic} recently examined the use of dynamic policies to address stochastic deviations from the fixpoint of system dynamics due to a finite population of drivers and passengers.  While their analysis is quite different from ours, they exploit some of the same underlying properties of the dynamics and we use an adaptation of their heuristic as a comparison in our experiments.  As we demonstrate, in our experiments we obtain faster convergence and can target a wider range of objectives.

While our focus is on the design of relocation policies, there is also a literature on the implications of implementing them with self-interested drivers through pricing or other mechanisms.  Closest to our work are those on spatial pricing~\citep{afeche2018ride,besbes2021surge,lu2018surge,bimpikis2019spatial}, but other work examines temporal policies such as charging ``surge'' prices at times of peak demand~\citep{hall2015effects,banerjee2015pricing,cachon2017role,chen2015dynamic,garg2021driver,hall2017labor} and combining both spatial and temporal pricing~\citep{buchholz2015spatial,guda2017strategic,ma2019spatio}.

While we treat all drivers and passengers in a region as interchangeable, there are also finer-grained models which focus on decisions about which specific driver to match to each passenger~\citep{hu2022dynamic,castillo2017surge,ozkan2020dynamic,biswas2017profit}.
%##############################################################

%\input{prelim}
\section{Preliminaries}\Secl{prelim}

\subsection{Notation}
Let $r$ denote the number of regions. In the sequel, all the vectors are $r$-dimensional and all the matrices are $r \times r$ dimensional.
We sometimes abuse notation and denote by $A[i,j]$ the term $(A[i])[j]$.

For two vectors $A$ and $B$, we say that $A \ge B$ if $\max(A,B)=A$, where the $\max$ (here and elsewhere in this document) is taken point-wise.
Thus, for every $i$, $A[i] \ge B[i]$.
Similarly, for vectors $A$ and $B$, $A\le B$ if $\min(A,B)=A$. 
Similarly, if $A$ and $B$ are matrices, we say that $A \ge B$ (resp.~$A\le B$) if for every $i$ and $j$, $A[i,j] \ge B[i,j]$ (resp.~$A[i,j] \le B[i,j]$). 

We denote by $\vecc{c}$ the vector whose entries are all $c$. 
%We usually use this notation for
In particular we use
$\vecc{0}$---the all 0 vector, and $\vecc{1}$---the all 1 vector. 

%$c\cdot V$ the product of $c$ with the vector $A$. 
%Similarly, we use $\max(c,A)$ to denote the vector obtained by taking point-wise maximum between $A$'s entries and $c$.

We use boldface to denote the sum of a vector or a matrix. 
Thus, $\mb{A}$ is the sum of $A$'s entries. 

For a matrix $A$, $\sigc{A}$ is the vector whose $i^{th}$ entry is the sum of the $i^{th}$ column of $A$.
Similarly, $\sigr{A}$, is the vector whose $i^{th}$ entry is the sum of the $i^{th}$ row of $A$. 

%ian01*: I think we need to be a bit clearer here.  Is the point that we can normalize things to get the unique corresponding vector?  Rewrote accordingly.
%For a non-negative real $x$, let a \emph{vector induced by $x$} be any ($r$-dimensional) vector of non-negative reals whose sum is $x$.
%We call a  vector $A$ of non-negative entries whose sum is 1 a \emph{probability vector}.
%Every vector induced by $x$ defines a probability vector $A$. Conversely, every probability vector $A$ defines a vector induced by $x$,
%which we call the \emph{the vector induced by $x$ given $A$}. 
%ldz01*: what about 0?  shouldn't $x$ be positive as opposed to non-negative?  I changed by then, is a * comment
 For a positive real $x$, let a \emph{vector induced by $x$} be any ($r$-dimensional) vector of non-negative reals whose sum is $x$.
We call a vector $A$ induced by $1$ a \emph{probability vector}.
Every vector induced by a positive $x$ uniquely defines a probability vector $A$ by normalization. Conversely, every probability vector $A$ defines a vector induced by $x$, namely $xA$, which we call the \emph{the vector induced by $x$ given $A$}. 

A \emph{right stochastic matrix}, or just a \emph{stochastic matrix} is a matrix all of whose rows are probability vectors.
Such a matrix corresponds to a Markov chain and is \emph{ergodic}  if for some power $k$, $A^k$ is positive. 
Ergodic matrices have unique \emph{stationary distribution}. 
We use the predicate $\er{A}$ to denote that a stochastic matrix $A$ is ergodic.
If $\er{A}$, we denote by $\sigma(A)$ its unique stationary distribution, thus $\sigma(A) = \sigma(A)A$. 

%IAK: I agree, no longer needed.
%\emph{LDZ: needed anywhere?}
%Let  $A$ be a stochastic matrix and  $I$ be a set of indices such that all 0 entries in $A$ belong to some column $i\in I$. 
%Then $A_{-I}$ is the matrix obtained from $A$ without the entries indexed  by $I$.
%Thus $A_{-I}$ is a $(r-|I|) \times  (r-|I|)$ ergodic matrix. 
%Define $\sigma(A)_{-I}$ as   $\sigma(A_{-I})$ with the $I$-entries are added as 0's. 
%For example, if $r=5$, $I=\{1, 3\}$, and $\sigma(A_{-I})= [\sigma[2], \sigma[4],\sigma[5]]$,
%then $\sigma_{-I} = [0,\sigma[2], 0, \sigma[4],\sigma[5]]$. 

\subsection{Model}

%Define a system as $\langle W, \pi\rangle$. 
%this defines next with two components, the W-dependent and the pi-dependent. 
%suggestion: nextW and nextPi and next is their sum. 
%Then in sections x to y we fix a mass of drivers q.  a supply vector is \ldots that sums up to q.  

Our model of a ridesharing system has four key parts:
\begin{enumerate}
\item A demand matrix
%$D$ such that $D[i,j]$ 
$W$ such that $W[i,j]$
denotes the mass of passengers wishing to travel from $i$ to $j$.
For technical convenience we assume that for every $i$ and $j$, $W[i,j] > 0$.  $W$ induces a right stochastic matrix which we denote $V$ where $V[i,j]$ is the fraction of passengers in region $i$ who wish to travel to $j$;
\item A policy $\strategy$---a probability vector where $\strategy[j]$ denotes the fraction of drivers who have no passengers that relocate to region $j$.  We denote by $Z_{\pi}$ the set of regions $i$ for which $\strategy[i]=0$. 
For notational convenience we sometimes refer to a stochastic matrix $\Pi$ where each row is $\strategy$. 
\item A total mass of drivers in the system $q \geq 0$.
%\item A price $p \in (0,1)$ that is the fixed price for each ride; 
\item At each timestep the current state of the system is represented by a mass distribution $M$ where $M[i]$ denotes the mass of drivers at region $i$.  Thus we require $M \geq \vecc{0}$ and $\mb{M}=q$.
\end{enumerate}

%Let $W$ (for Willingness) be the matrix $(1-p)\cdot D$.  
%Each $W[i,j]$ is the number of passengers \emph{willing} to take a ride whose price is $p$ from $i$ to $j$.
%Let $V$ denote the right stochastic matrix induced by $W$.
%We fix $\strategy$, $p$, $D$ (and therefore $W$ and $V$)  for the duration of this discussion. 


From a region $i$, the first up to $\sigr{W}[i]$ drivers are distributed \emph{full}, that is with passengers, to every region $j$ proportional to $V[i,j]$.
The other drivers, if any,   drive \emph{empty}, that is passenger-less, to a destination according to $\pi$. 
A region $i$ for which $M[i]\ge \sigr{W}[i]$ is called \emph{saturated}; otherwise it is called \emph{unsaturated}.
Let $U_M$ be the set of unsaturated regions given $M$.
Let $\mathit{out}(M)[i]$ be the vector describing the outflow from region $i$ given $M$.
For every region $i$, 
%$$\mathit{out}_{W,\pi}(M)[i]=\left\{
	%\begin{array}{ll}
		%M[i] V[i] & i \in U_M\\
		%\sigr{W}[i] V[i] + (M[i]-\sigr{W}[i]) %\strategy & \mbox{otherwise}
%	\end{array} \right.
%$$	
%\vspace{-0.5mm}
$$\mathit{out}_{W,\pi}(M)[i]=
\begin{cases}
	M[i] V[i]&\hspace{-5mm} \text{ $i \in U_M$}\\
	\sigr{W}[i] V[i] + (M[i]-\sigr{W}[i]) \strategy &\text{o.w.}%\mbox{otherwise}
	\end{cases}
$$
%\vspace{-0.5mm}
The first line, as well as the first term in the second line, refer to full rides from $i$, while the second term in the second line refers to empty rides from $i$. 

We assume that time is discrete and all rides and relocations take unit time.  Denote by $\next{M}$  the vector of the drivers available in each location in the next step. 
Then for every region $i$, 
%\vspace{-0.5mm}
$$ \next{M}_{W,\pi}[i] = \sum_j \mathit{out}_{W,\pi}(M)[j,i] 
$$
For brevity, when it does not create ambiguity we may omit the $W$ or $\pi$ from the subscript.
A fact that will be used repeatedly is that $\next{M}$ (and its component parts) are monotone and continuous in $M$:

\begin{lemma}\Leml{phi_monotone}
The functions $\mathit{out}(M)$ and $\next{M}$ are continuous and increasing   in $M$.
\end{lemma}
\noindent{\bf Proof: } Consider a region $i$ and the function $\mathit{out}(M)[i]$ which is either
a product of $M[i]$--- a scalar which is continuous in $M$---and a constant vector, 
or a sum of two such vectors. Hence $\mathit{out}(M)[i]$ is continuous. 
In either case, it is also increasing in $M$. 
Since $\next{M}$ is a summation over $\mathit{out}(M)$'s columns, the claim follows. 
\QED

The stylized model we have described is a special case of a number of models in the literature (e.g. ~\citep{bimpikis2019spatial,braverman2019empty,hosseini2021dynamic}).  We have made it as simple as possible to enable a clear, concise analysis.  We discuss relaxing assumptions in \Secr{discussion}.
%##############################################################

%\input{setup} 

%\input{construct}
\section{Constructing a Fixpoint
%for $\mathit{next}$
}\Secl{construct}
Fix a demand matrix $W$ and a policy $\pi$.
We present an algorithm (\Algr{construct}) that, given a quantity $q \geq 0$, constructs a mass distribution $M$ such that $\bm{M}=q$ and $M=\next{M}$. 
Here,  $\next{M}$ is analogous to the action of a Markov chain and such a fixed point would correspond to a stationary distribution.  However, in general the action of $\next{M}$ is not a Markov chain because $\mathit{out}_{W,\pi}$ is piecewise linear rather than linear.
We first introduce some notation and claims that are used in the algorithm and its proof of correctness.

\subsection{Building Blocks}
A mass distribution  $M$ defines the \emph{marginal transition matrix} $T_M$ where for every region $i$, 
$$ T_M[i] ~=~\left\{\begin{array}{ll}
			  V[i] & i \in U_M \\
			  \pi	     & \mbox{otherwise}
			\end{array}\right.
$$

Because $W[i,j] > 0$ for all $i$ and $j$, $T_M$ is ergodic for every $M$ for which $U_M$ includes some non-$Z_\strategy$ regions. 
Given $M$ such that $\er{T_M}$ and  $\sigma = \sigma(T_M)$, define
$$ q(M) = \min_{i\in U_M} (({\sigr{W}[i] - M[i]})/{\sigma[i]})$$
That is, $q(M)$ is the maximal mass that can be added, according to $\sigma$,  to $M$ as not to cause any region that was unsaturated  to be oversaturated. 
%
The following lemma establishes that if $M=\next{M}$ and there are some non-$Z_{\strategy}$ unsaturated regions, then adding any quantity $q\le q(M)$ to $M$, proportionally to $\sigma(T_M)$, will  maintain the fixpoint. % property. 

\begin{lemma}\Leml{inc_q}
Assume $M=\next{M}$ and  $U_M\cap \overline{Z_{\strategy}} \ne \emptyset$, and let $\sigma=\sigma(T_M)$.
Then for every $q'$, $0 \leq q' \leq q(M)$,  $M + q'\sigma$ is a fixpoint mass distribution of $\next{}$. 
\end{lemma}
\Proof
Consider some $q'$, $0 \leq q' \leq q(M)$ and $i\in U_M\cap \overline{Z_{\strategy}}$.
The additional outflow from region $i$ in $M' = M + q' \sigma$ relative to $M$ is $q' \sigma[i]$. 
Every  region $j$ contributes $q' \sigma[j] T_M[j,i]$ into $i$. 
Since $\sigma=\sigma(T_M)$, $\sum_j q' \sigma[j] T_M[j,i] =  q'\sigma[i]$.
\QED

The following lemma establishes that if $M=\next{M}$ with all unsaturated regions in  $Z_{\strategy}$,  then adding any quantity $q\ge 0$ to $M$, according to $\pi$, will  maintain the fixpoint. %property. 

\begin{lemma}\Leml{delJ}
Assume $M = \next{M}$  and $U_M \subseteq Z_{\strategy}$. 
Then for every $q\ge 0$, $M + q\pi$ is a fixpoint mass distribution.
\end{lemma}

The proof, along with subsequent omitted proofs, can be found in \Appr{proofs}

\iffalse{
\Proof 
If suffices to show that for $M' = M + q\pi$, $M'=\next{M'}$.  A region $i\in U_M$ is in $Z_\pi$ by
assumption, hence $M'[i]=M[i]$ and,
since  $i\in Z_{\strategy}$, $\mathit{out}(M')[j,i] = \mathit{out}(M)[j,i]$ for every $j\not\in U_M$.
Thus $\next{M}[i] = \next{M'}[i]$. 

For a region $i$ not in $U_M$, $M'=M + q\pi[i]$.  While non-$U_M$ regions contribute no additional flow into $i$, 
each other region $j$ contributes  $q\pi[j] \pi[i]$ additional flow into $i$. 
Since $\sum_{j\not\in U_M} \pi[j]\pi[i] = \pi[i]$, 
hence the additional flow into $i$ is $q\sigma[i]$.  \QED
}\fi


\subsection{The Construction}
Let $q \geq 0$. We now describe a construction that allocates $q$ drivers into a mass distribution $M$ such that $M$ is a fixpoint. 
The function mapping $q$ into the fixpoint is piecewise linear and is accomplished in at most $r+1$ phases.  
In all but possibly the last phase, a portion of the remaining $q$-allocation is distributed among the regions as to satisfy the fixpoint yet so that no unsaturated region becomes oversaturated, until either $q$ is exhausted or all non-$Z_\pi$ regions are saturated. 
If all non-$Z_\pi$ regions are saturated and $q$ is not exhausted, the remaining mass is distributed according to $\pi$. 
%
The construction algorithm is in \Algr{construct}.

\begin{algorithm}[htb]
\SetAlgoLined
\DontPrintSemicolon
\SetKwInOut{Input}{input}
\SetKwInOut{Output}{output}
\SetKwRepeat{Repeat}{do}{until}
\KwIn{A willing demand $W$, a policy $\pi$, and a total mass of drivers, and $q$}
\KwOut{A mass distribution $M$ such that $\mathbf{M}=q$ and $\next{M}=M$}
$\curM:$ a vector, {\bf init} 0 \tcc{current mass distribution} 
$\curm:$ {\bf init} $q$ \tcc{current mass}
$i:$ {\bf init} $r$ \tcc{ghost variable for correctness proof}

\While{$(\curm > 0)$}{ 
 	\label{while_b}
  	\eIf{$U_M\subseteq Z_\strategy$}{ \label{case_pi_b}
  	 		$\sigma\leftarrow\pi$\; 
	    		$\tmin \leftarrow \curm$\; 
	\label{case_pi_e}}{
    		$\sigma\leftarrow\sigma(T_M)$ \; \label{fix}
	    $\tmin \leftarrow \min (q(M), \curm)$\; \label{dist}
	}
%  \lIf{$(U_M\subseteq Z_\strategy)$}{ 
 % 	 $\tmin \leftarrow \curm$ {\bf else} $\tmin \leftarrow \min (q(M), \curm)$ \label{allocate}} 
 $\curM \leftarrow \curM + \tmin\cdot \sigma$\;
 \label{Mupdate}
   $\curm \leftarrow \curm - \tmin$\;
   \label{update_q}
   $i \leftarrow i -1$\;
} \label{while_e}
\Return {$\curM$}
\caption{Construction of Fix Point}\Algl{construct}
\end{algorithm}



We now show that:
\begin{theorem}\Thl{alg_correct} 
The Algorithm in \Algr{construct} terminates, and upon termination $\curM = \next{\curM}$
and $\bm{\curM} = q$.  
\end{theorem}
%#######################################################

%\input{unique}
\section{Uniqueness of fixpoint for $\mathit{next}$}\Secl{unique}

%We establish that for every mass driver vector $M$, $\next{M}$ has a unique fixpoint. 
%To this end we assume a positive real $q$, which we fix for this section, and show
%%ian01:
%%for all vectors $M$ of non-negative reals for which $\bm{M}=q$,
%the existence of  a unique vector, $\widehat{M}$, of non-negative reals whose sum is $q$ such that
%$\next{\widehat{M}}=\widehat{M}$.

%We first introduce $S_q$, a vector space that includes all vectors of non-negative reals whose sum is $q$, 
%and show it to be compact and convex. 
%It then follows, from Brouwer's fixpoint theorem, that $\mathit{next}$ has \emph{some} fixpoints in $S_q$.
%The vector space $S_q$ is then extended to a complete lattice $\lattice$ on which one can 
%%with the aid of an auxiliary monotonically increasing function ($g$)  that has the same fixpoints as $\mathit{next}$, 
%apply there Knaster-Tarski theorem to show that  the fixpoint of $\mathit{next}$ in $S_q$ is unique. 

%Let
%%ian01*: Made it R^{\ge 0} since we weren't enforcing the non-negativity otherwise.  I also took out the infinities here; we don't need them for closure because $q$ is finite, but will later when we define \cal R.
%%$$ S_q ~=~ \{ M \in (\mathbb{R}^{\ge 0}\cup\{\infty\})^r : \bm{M} = q\}$$
%$$ S_q ~=~ \{ M \in (\mathbb{R}^{\ge 0})^r : \bm{M} = q\}$$
%%Including $\infty$ in the vectors renders $S_q$ closed, and since $S_q$ is bounded --- it is a $(r-1)$ dimensional simplex in the
%The vector space $S_q$ is close and bounded in an $r$-simplex in the
%non-negative portion of $\mathbb{R}^r$, hence it is compact. 
%Moreover, for every 
% $\beta \in [0,1]$ and  $M_0, M_1 \in S_q$, 
%$$\sum_i (\beta M_0[i] + (1-\beta) M_1[i]) ~=~\beta q+ (1 - \beta) q $$
%hence $S_q$ is convex. 

%%ian01*: Brouwer requires continuity, not monotonicity
%%Since $\mathit{next}$ is monotone (\Crlr{monotone}) 
Fix a demand matrix $W$ and a policy $\pi$.
For every $q \in \mathbb{R}^+$ we have shown how to construct a fixpoint of $\mathit{next}$ with total mass $q$.  We now show this fixpoint is unique.
Let 
$$ S_q ~=~ \{ M \in (\mathbb{R}_{\ge 0})^r : \bm{M} = q\}$$
denote the set of mass distributions with total mass $q$.  In the proof, rather than confining our analysis to $S_q$ we extend it to a complete lattice $\lattice$ on which one can, with the aid of an auxiliary monotonically increasing function ($\aux$)  that has the same fixpoints as $\mathit{next}$, 
apply the Knaster-Tarski theorem to show that  the fixpoint of $\mathit{next}$ in $S_q$ is unique. 

\begin{theorem}\Thl{unique}
Let $W$ and $\pi$ be given. For all $q \geq 0$, the function $\mathit{next}$ has a unique fixpoint in $S_q$.
\end{theorem} 

The proof of the theorem relies on the following technical lemma.  
Consider two mass distributions $M_0$ and $M_1$ such that $M_0 \lneq M_1$ and let $i$ and $j$ be regions such that
$\next{M_0}[i] = \next{M_1}[i]$ and $M_0[j] < M_1[j]$. 
That is, the increase in mass from $M_0$ to $M_1$ adds drivers to $j$ but  does not result in additional inflow to $i$.
The following lemma show that this is equivalent to having $i$ assigned no rides by $\strategy$ and $j$ being saturated in $M_0$.

\begin{lemma}\Leml{we_need}
Let $M_0$ and $M_1$ be mass distribution vectors such that $M_0 \lneq M_1$.  Let $J \subseteq U_{M_0}$ be the set of unsaturated regions $j$ for which $M_0[j] < M_1[j]$.  Then
for all regions $i$,
$$ 
\next{M_0}[i] =\next{M_1}[i] \quad\mbox{iff}\quad
i \in Z_{\strategy}~\mbox{and}~ J= \emptyset
$$
\end{lemma}
%##########################################################

%\input{linear_prog}
\section{Optimal Mass Allocation }\Secl{optimalLP}

So far, we have shown how given a fixed willingness $W$ (and thus also $V$), policy $\pi$, and mass $q$ we can compute the unique fixpoint. However, a ridesharing platform will typically have at least some control over $\pi$.  A natural question is how $\pi$ should be chosen.  We examine how this can be done to maximize the number of full rides.
We give a linear programming approach to calculating such an optimal $\pi$.  This type of approach has been used in a number of similar models~\citep{braverman2019empty,hosseini2021dynamic,bimpikis2019spatial}, but we provide a complete treatment as we use it in our experiments

Recall the definition of \emph{out} from \Secr{prelim}.  There we split the computation of number of rides outgoing from a region according to whether the region is saturated.   If the region is unsaturated, then obviously all outgoing rides are full.  Else, some outgoing rides are empty, and distributed according to $\pi$.  Let $F[i,j]$ denote the full outgoing rides from $i$ to $j$ and $E[i,j]$ the empty outgoing rides.  That is, for a mass distribution $M$, 
$$F(M)[i]~=~\left\{ 
	\begin{array}{ll}
		M[i]V[i] & M[i] < \sigr{W}[i] \\
		W[i]   & \mbox{otherwise}
	\end{array} \right.
$$		
and 
$$E(M)[i]~=~\left\{ 
	\begin{array}{ll}
		0 & M[i] < \sigr{W}[i] \\
		(M[i]-\sigr{W}[i])\strategy & \mbox{otherwise}
	\end{array} \right.
$$		

Rather than directly calculating a policy $\pi$ that at the fixpoint maximizes $\bm{F}$, we instead provide a linear program in \Figr{feasible} that defines when a solution $\langle F, E \rangle$ is feasible in that it results from the fixed point of some policy $\pi$ for supply $q$.
The first constraint restricts to non-negative values. The second, \emph{{\bf S}upply-constraint},  requires that the total traffic is $q$. The third, \emph{{\bf F}low-constraint}, requires that at each region the in- and out- flows are equal (thus the solution is a fixpoint). The fourth, \emph{{\bf D}emand-constraint}, requires that the full outgoing traffic at each region is no more than the willing demand at the region. Finally, the fifth, \emph{{\bf P}roportion-constraint}, requires that full outgoing traffic at each region is allocated according to the willingness entry for that region.  
%\begin{figure}[htb]
%\fbox{
%\begin{tabular}{l l l l}
% & $F[i,j], E[i,j] \geq 0$ & for every $i$ and $j$ \\
% & $\bm{(F+E)} = q$ & & S-constraint \\
% & $\bm{(F+E)}[i,]=\bm{(F+E)}[,i]$ & for every $i$ & F-constraint \\
% &  $\bm{F}[i,] \le \bm{W}[i,]$ & for every $i$ & D-constraint\\
% & $F[i,j] = F[i,k] \cdot V_W[i,j]/V_W[i,k] $ & for every $i$, $j$, and $k$ & P-constraint 
%\end{tabular}}\caption{Definition of feasibility of $\langle F, E\rangle$ for supply $q$ \Figl{feasible}}\end{figure}
\iffalse
\begin{figure}[htb]
\fbox{
\begin{tabular}{l l l}
 & $F[i,j], E[i,j] \geq 0 \quad \forall i,j$  \\
 & $\bm{(F+E)} = q$ & S-constraint \\
 & $\sigr{(F+E)}[i]=\sigc{(F+E)}[i] \quad \forall i$ & F-constraint \\
 &  $\sigr{F}[i] \le \sigr{W}[i]\quad \forall i$  & D-constraint\\
 & $F[i,j] = F[i,k] \cdot V_W[i,j]/V_W[i,k] \quad \forall i,j,k$  & P-constraint 
\end{tabular}}\caption{Definition of feasibility of $\langle F, E\rangle$ for supply $q$ \Figl{feasible}}\end{figure}
\fi
%\hspace{-3.5mm} to put before constraint
\begin{figure}[htb]
\fbox{
\noindent
\begin{tabular}{@{}l l@{}}
  $F[i,j], E[i,j] \geq 0 \; \forall i,j$  \\
 $\bm{(F+E)} = q$ & S-constraint \\
  $\sigr{(F+E)}[i]=\sigc{(F+E)}[i] \; \forall i$ &  F-constraint \\
 $\sigr{F}[i] \le \sigr{W}[i]\; \forall i$  &  D-constraint\\
  $F[i,j] = F[i,k] \cdot V_W[i,j]/V_W[i,k] \; \forall i,j,k$  & P-constraint 
\end{tabular}
}\caption{Definition of feasibility of $\langle F, E\rangle$ for supply $q$ \Figl{feasible}}\end{figure}

Below are some properties of feasibility as defined here.
 
\begin{Observation}\Obl{feas1}
Let $W$ and $\pi$ be given.
Given any $q \ge 0$, there is a feasible solution for $q$. 
\end{Observation}

\iffalse{
 \Proof
 Given $q \ge 0$, consider an all-zero $F$ and an $E$ that is all zero but for diagonal
whose elements sum to $q$. Hence  $\langle F, E\rangle$ is feasible for $q$.  
Moreover, if $q>0$ there are infinitely many feasible $\langle F, E\rangle$'s for $q$. 
 \QED
}\fi

\begin{Observation}\Obl{feas2}
Let $W$ and $\pi$ be given and
let $\langle F, E\rangle$ be feasible for $q$. Then the following all hold:
\begin{enumerate}
\item For every $c$,  $0\leq c \leq 1$, $\langle cF, cE\rangle$ is feasible for $cq$;
\item If $\langle F', E'\rangle$ is feasible for $q'$ and $(F+F')\leq W$, then
$\langle F+F',E+E'\rangle$ is feasible for $q + q'$;
\item (Every non-trivial $x>0$ carrying empty cycle can be removed:)
\begin{enumerate}
\item If for some region $i$, $E[i,i] > x > 0$, then if $E'$ is just like $E$
only that $E[i,i]=E[i,i] - x$, then $\langle F, E'\rangle$ is feasible for $q-x$; 
\item If for some regions $i$ and $j$, $i\ne j$, and some $x>0$, $E[i,j],E[j,i]\ge x$, then 
if  $E'$ is just like $E$ only that $E'[i,j]=E[i,j]-x$ and $E'[j,i]=E[j,i]-x$, then 
$\langle F, E'\rangle$ is feasible for $q-2x$.
\end{enumerate}
\end{enumerate}
\end{Observation}
%\Proof
%(1)--(3) follow directly from feasibility. 
%\QED

The requirements of feasibility are linear, thus the problem OA, in \Figr{OA}, is a linear programming problem.
\begin{figure}[htb]
\centering
\fbox{ \begin{tabular}{c}
Maximize $\bm{F}$ such that\\
\qquad $\langle F,E\rangle$ is a feasible solution for supply $q$ \end{tabular}
}\caption{The Optimal Allocation (OA) problem \Figl{OA}}\end{figure}

The only requirement of a fixpoint not directly enforced by feasibility is that drivers do not leave empty if there are passengers waiting.
The following lemma shows that, at the optimal solution, 
every region has a greedy strategy: it only sends empty cars after all demand is satisfied.

\begin{lemma}\Leml{property1}
Let $q \ge 0$, and assume $\langle F_0,E_0\rangle$ is feasible for $q$. 
If there exists some $i_0$ and $j_0$ such that
$E_0[i_0,j_0]>0$ and $F_0[i_0,j_0] < W[i_0,j_0]$, then 
$\langle F_0,E_0\rangle$ is not an optimal solution of OA. 
\end{lemma}
%#############################################################

%\input{dynamic_theory}
\section{Dynamic Relocation via Fixed Point Construction}

In this section we introduce our formulation of the problem of computing a dynamic relocation policy, introduce a particular policy based on our fixed point construction in \Algr{construct}, and show it has attractive theoretical properties in terms of convergence rate and welfare loss while converging.

\subsection{The Dynamic Relocation Problem}

Given $W$ and $\pi$ and some $q>0$, let  $M^*$ such that $\mb{M^*}=q$ be the unique fixed point of $\mathit{next}$ whose existence is guaranteed by \Thr{unique}.
In analogy with a Markov chain, $M^*$ serves as a stationary distribution.  As part of our analysis in this section (\Crlr{limit}), we will show that it also serves as a limit distribution.  That is, starting from any mass distribution $M_0$ such that $\mathbf{M_0} = \mathbf{M^*}$, $\lim_{t\rightarrow\infty} \mathit{next}^t(M_0) = M^*$.  However, obtaining $M^*$ by repeated application of $\next{}$ may take a long time, which has been observed to be an important problem for applying relocation policies in practice~\citep{braverman2019empty}.

The problem of \emph{dynamic relocation} is to find a sequence of policies that accelerate convergence.  %For a policy $\pi'$, denote by $\next_{\pi'}$ the $\next{}$ function where policy $\pi'$ is applied. %
Given an initial mass distribution $M_0$, a dynamic relocation policy computes a sequence of policies $\{\pi_i\}_{t=0}$ such that the sequence $ \{M_t\}_{t=0}$, where each $M_{t+1}=\nextpi{\pi_t}(M_t)$, converges to $M^*$ faster than the sequence obtained when $\mathit{next}_\pi$ is applied iteratively.
%
Unless no region is saturated, mass distributions $M$ and $M'=\mathit{next}_{\pi'}(M)$ uniquely defines $\pi' = (M' - \sigc{F(M)}) / \mathbf{E(M)}$ (see \Secr{optimalLP}).
%
Hence the problem of dynamic relocation can be stated as identifying a sequence $ \{M_t\}_{t=0}$ as above that satisfies:
\begin{itemize}
    \item[1.] mass conservation: for every step $t$, $\mb{M_t} = \mb{M^*}$
    \item[2.] relocation constraint: for every step $t \ge 0$, $M_{t+1} \ge \sigc{(F(M_{t}))}$
\end{itemize}
%It will also be useful to describe what would happen if we let drivers follow the policy $\pi_t = \pi$.  To that end, let $R_M[i] = \mathbf{E_M}[,i] = \sum_j \mathbf{E_M}[j,] \pi[i] = (\mathbf{M} - \mathbf{I_M})\pi[i]$ denote flow of ''relocating'' drivers into a region $i$ when policy $\pi$ is followed.

\subsection{Our Dynamic Relocation Policy}

We now introduce our dynamic policy based on our fixpoint construction.  Before doing so, we introduce some additional notation.
\begin{itemize}
     \item Let $\nu(q)$ be the unique fixpoint of $\nextpi{\pi}$ with mass $q$.  That is, $\nextpi{\pi}(\nu(q)) = \nu(q)$ and $\nuu{q}=q$.
      
      \item Let $\nu^{-1}$ be the $\nu$'s inverse extended to all mass distributions, $\nu^{-1}(M) = \max \{ q' : \nu(q') \le M\}$, that is,  captures the mass of the greatest fixpoint vector that is no larger than $M$.
\end{itemize}

We next  define our dynamic relocation policy inductively.
For a vector $A$, define $\maxz{A} = \max(A,\vecc{0})$, where as usual $\vecc{0}$ is the all zeroes vector and the maximum is taken coordinate-wise.
Given $M_t$, we define $q_{t+1}$ to be  the maximum solution to 
$$\sum_{i} \maxz{\nu(q_{t+1})-\sigc{F(M_t)}}[i] = \mb{E(M_t)}$$
%
%To see that this is well defined, recall that
%$\next{M}$ is monotone and continuous in $M$ by \Lemr{phi_monotone} and that
\Thr{alg_correct} implies that $\nu(q)$ is monotone increasing and continuous in $q$. Thus,
$\maxz{(\nu(q)-\sigc{F(M_t)})}$ is also monotone non-decreasing  and continuous in $q$.  If $ \mb{E(M_t)} >0$, $q_{t+1}$ is the unique solution, and if 
$\mb{E(M_t)} = 0$, the set of solutions $\{ q' \mid \nu(q') \le \sigc{F(M_t)} \}$ has as its maximum $q_{t+1} = \nu^{-1}(\sigc{F(M_t)})$, hence $q_{t+1}$ is well defined.

We define $M_{t+1} = \max(\sigc{F(M_t)}, \nu(q_{t+1}))$.
To verify that this is a dynamic policy, note that the relocation constraint, 
$M_{t+1} \ge \sigc{(F(M_{t}))}$, is satisfied by construction.  As for the mass conservation constraint, 
$\mb{M_{t+1}}= \sum_i \maxz{\sigc{F(M_t)} + (\nu(q_{t+1})- \sigc{F(M_t)}}[i] = \mathbf{F(M_t)} + \mb{E(M_t)} = \mb{M^*}$.
 
\subsection{Convergence properties}\Secl{convergence}

We now analyze the convergence properties of our dynamic relocation policy.  We show that it  converges, at least linearly, to $M^*$ and that a %simpler version of the
same argument shows that $M^*$ acts as a limit distribution.  To obtain a stronger guarantee, we relate the progress made by the dynamic policy to $\mb{E(M)}$, the mass of drivers we actually control.  The resulting guarantee also provides a bound on the mass of extra empty rides relative to those inherent in $M^*$.

Our first result is that the $q_t$ are strictly increasing at a rate that implies (at least linear) convergence.  Let $\Delta(t) = \mathbf{M^*} - q_t$. Since $\nu(\mathbf{M^*}) = M^*$, $\Delta(t)$ can  serves as a measure of the distance between the fixed point we have succeeded in ``constructing'' so far and our goal.  

\begin{lemma}\Leml{psi>0}
$\exists c>0$ s.t.
 $q_{t+1} - q_t \ge c \Delta(t)$.
\end{lemma}


\iffalse{
\Proof
Let $c = \min_{i,j}(V[i,j])$. 
If $\mb{E(M_t)} > 0$, then $\maxz{\nu(q) - \sigc{F(M_t)}}$ is monotone increasing in $\nu(q)$ on an open interval including $q_{t+1}$.
In \Appr{proofs} we show that 
%$$\sum_i\maxz{\nu(q_t+c\Delta(t))-\sigc{F(M_t)}}[i]
%\leq \sum_i\maxz{[\nu(q_{t+1}) - \sigc{F(M_t)}]}[i]$$
\begin{multline*}
\sum_i\maxz{\nu(q_t+c\Delta(t))-\sigc{F(M_t)}}[i]\\
\leq \sum_i\maxz{[\nu(q_{t+1}) - \sigc{F(M_t)}]}[i]
\end{multline*}

Monotonicity then implies that 
$\nu(q_{t+1}) \geq \nu(q_t+ c \Delta(t)$, and therefore
$q_{t+1} - q_t > c \Delta(t)$.

If $\mb{E(M_t)} = 0$, there is no relocation and we cannot apply the previous argument because we only know that $\maxz{\nu(q) - \sigc{F(M_t)})}$ is monotone non-decreasing.  In \Appr{proofs} we show that then  $M_{t+1}[i] - \nu(q_t)[i] \geq  c\Delta(t)$.
%
Thus $M_{t+1} = \nu(q_t) + (M_{t+1} - \nu(q_t)) \geq \nu(q_t) + c\Delta(t)\vecc{1} \geq \nu(q_t + c\Delta(t))$ and $q_{t+1} \geq q_t + c\Delta(t)$. 
\QED
}\fi

\Lemr{psi>0} shows that we are guaranteed to make progress even if we do not control the relocation of any drivers, and is analogous to results about the convergence of Markov chains toward their stationary distributions.  Because it does not rely on relocation, it is quite weak; a simpler version of its proof shows that simply statically using $\pi$ satisfies essentially the same bound.  This also shows that $M^*$ acts as a limit distribution of the dynamics.

\begin{corollary}\Crll{limit}
For all mass distributions $M_0$ such that $\bf{M_0} = \bf{M^*}$, $\lim_{t\rightarrow\infty} \mathit{next}^t_\pi(M_0) = M^*$
\end{corollary}

%The proof is in \Appr{proofs}.


To get an (often) stronger bound, recall that the essence of our dynamic relocation policy is that it makes the maximum progress it can given the mass it controls.  Let $\Delta^E(t) = \mathbf{E(M_t)} - \mathbf{E(\nu(q_t))}$.  This captures the mass of relocating drivers beyond that present in $\nu(q_t)$.  The following lemma, whose proof is in \Appr{proofs},  shows that this provides a lower bound on the progress made by the algorithm.
\begin{lemma}\Leml{nondecreasingq}
$q_{t+1} - q_{t} \ge \Delta^{E}(t)$.
\end{lemma}


When $\Delta^{E}(t)$ is large, which requires many drivers to be relocating, \Lemr{nondecreasingq} guarantees that we make rapid progress toward our goal.  It also guarantees that our dynamic relocation policy has an attractive welfare property: because we make progress at least equal to the mass of drivers relocating who would not also be relocating under $\nu(q_t)$ (and also under $M^*$), we end up paying the cost of extra relocation at most once for each driver.  Thus, the total excess mass of empty rides $\sum_t \mathbf{E(M_t)} - \mathbf{E(M^*)}$ is at most $M^*$, as the following observation, whose proof is in \Appr{proofs}, shows.

\begin{Observation}\Obl{convergence}
$\sum_t (\mathbf{E(M_t)} - \mathbf{E(M^*)}) \le \mathbf{M^*}$.
\end{Observation}


We are aware of no prior approach that provides such a guarantee.  It is easy to see, for example, that the static policy $\pi$ can have more than one extra relocation per driver. (Construct an example where all the drivers start in the same region and essentially all relocate in the first step but this is not the fixpoint.)  Many other prior approaches are heuristic and do not even provide a guarantee of convergence.  
%#############################################################

%\input{didi_simulations}
\section{Simulations} \Secl{didi}

Our simulations are based on a dataset from Didi from 2016 for an unspecified region in China that was processed by \citet{braverman2019empty} into a form suitable for our model, representing nine major regions of the city.  For completeness we provide the model parameters in \Appr{didi}. 
Notably, these include non-uniform distances between regions.  While our theoretical analysis assumed unit travel times, our constructive approach is easily adapted to this richer setting.  See \Appr{didi} for a discussion.
\citet{braverman2019empty} perform their experiments in a large continuous-time system while we apply our discrete time model using 15-minute intervals. Our results are similar which validates our discrete approach.  The results of both this recreation and a variant we introduce show that our dynamic relocation policy outperforms prior approaches in this setting.

\subsection{What we compare}

 We compare our approach, which we refer to as \dynamic, with four other policies:

\begin{description}
    \item[\static.]  This is a static policy that  sets $\pi_t = \pi$ for all $t$.  From \Crlr{limit} it follows that \static  guarantees convergence to the fixpoint, yet, as we pointed out, it may do so slowly.  Thus \static represents a baseline in the absence of a more sophisticated dynamic policy.
    \item[\greedy.] This is a greedy policy that  distributes the relocating mass proportional to the unmet demand in each region with a one-step look ahead. That is, it takes $\pi_t[i] \propto \maxz{\sigr{W} - \sigc{F(M_t)}}$, which   guarantees that as many relocating drivers as possible will have a passenger at time $t+1$ while spreading them among the regions where they can be useful.  As \greedy does not depend on $\pi$, it may not converge to the fixpoint, but it does provide a meaningful baseline for other metrics based purely on the provision of service.
    \item[\lka.] This is a heuristic proposed by \citet{braverman2019empty} that forms a model based on the average of demand patterns over a lookahead window and finds the optimal static policy for this average.  For the look head, we use 2, 3, and 4 steps.
    \item[\hmr.] We adapt the dynamic policy of \citet{hosseini2021dynamic}, which dispatches a single car at a time, to our setting.
    In particular, their algorithm computes a measure of which region will generate the most long-run service and sends the car there.  Since the results of this computation do not change until a region is saturated, we adapt their policy by assigning relocating drivers to this region until (a) it becomes saturated or (b) the mass reaches the fixpoint mass of the region.
 
 
\end{description}

\subsection{Results}

Our first experiment is based on a similar experiment by \citet{braverman2019empty} and uses the same parameters.  We simulate the system for four hours.  Initially the supply of drivers is large relative to the demand for rides, so almost all requests can be served.  However, after two hours demand increases sharply and passengers' destinations (i.e. $V$) are randomly permuted.  Table~\ref{tab:braverman4} show the resulting availability averaged over 20 runs.  Availability is a standard metric in this setting and is the average fraction of demand served across regions.  (See \Appr{simulations} for more discussion of this and other metrics.)

Our results for \static and \lka are qualitatively similar to those reported by \citet{braverman2019empty} in their Table 4.  In the first two hours almost all demand is served while the \static~performance drops with the demand transition as it is slow to adapt to the new demand pattern.  In contrast, \lka~with lookaheads of 2, 3, and 4, steps (30, 45, or 60 minutes) can anticipate the change and has a smoother transition.  The other three approaches (\hmr, \greedy, and \dynamic) do not anticipate the change but simply adapt to it rapidly.  \hmr and \greedy achieve similar performance to \lka, while our \dynamic~adapts the fastest and outperforms the alternatives.

Our second experiment extends this approach by keeping the overall level of demand ($\bm{W}$) fixed while permuting the pattern of demand as in the previous experiment.  We permute the demand 19 evenly-spaced times over the course of the experiment and report results averaged over 20 runs.  
We treat \static as a baseline and report the percentage change in availability relative to it for each of the other policies.
\Figr{fig:didi2} shows two variations. On the left, we fix the ratio between supply ($\bm{M}$) and demand ($\bm{W}$) at 1.15 and vary the number of steps between demand changes. 
We start this variation from  5 steps, which corresponds to updates every 75 minutes, as we think smaller values represent unreasonably fast changes of demand.
In contrast to our first experiment, \lka only slightly outperforms \static.  Intuitively, while \lka can anticipate future demand patterns without a significant excess of drivers, its ability to reposition them for future demand while still serving current demand is limited.  Since \dynamic simply adapts changing demand patterns rapidly, it remains highly effective although the benefits decrease as changes in demand become less frequent.
Neither \greedy nor \hmr performs well in this setting relative to \static.

On the right, we fix the number of steps between demand changes at 10 and vary the ratio between supply and demand.  The performance of all policies improves relative to \static as the supply of drivers increases.  \greedy, as a simple baseline, is still outperformed by the other approaches.  However, now \hmr~does better than the lookahead approaches at sufficiently high levels of supply.
\iffalse
\begin{table}
\begin{center}
\begin{tabular}{|c|c |c| c| c |c|}
\hline
& Hour1& Hour2& Hour3& Hour4& 4-hour Total\\
\hline
CON&  1.000& 1.000& 0.937& 0.938& 0.969\\
\hline
HMR&  1.000& 1.000& 0.888&  0.918& 0.952\\
\hline
GDY&  1.000& 1.000& 0.912&  0.915& 0.957\\
\hline
LKA2&  0.983& 0.995& 0.858&  0.929& 0.941\\
\hline
LKA3&  0.982& 0.991&  0.882& 0.930& 0.946\\ 
\hline
LKA4&	0.983& 0.990& 0.897& 0.931& 0.950\\ 
\hline
STA&	0.983& 0.998& 0.814& 0.929& 0.931\\ 
\hline
\end{tabular}
\end{center}
\caption{Availability improvement in a 4 hour simulation with single demand change \label{tab:braverman4}}
\end{table}
\fi

\begin{table}
\begin{center}
\begin{tabular}{|c|c |c| c| c |c|}
\hline
& Hour1& Hour2& Hour3& Hour4& 4-hour Total\\
\hline
CON&  1.000& 1.000& 0.937& 0.938& 0.969\\
\hline
GDY&  1.000& 1.000& 0.912&  0.915& 0.957\\
\hline
HMR&  1.000& 1.000& 0.888&  0.918& 0.952\\
\hline
LKA4&	0.983& 0.990& 0.897& 0.931& 0.950\\ 
\hline
LKA3&  0.982& 0.991&  0.882& 0.930& 0.946\\ 
\hline
LKA2&  0.983& 0.995& 0.858&  0.929& 0.941\\
\hline

STA&	0.983& 0.998& 0.814& 0.929& 0.931\\ 
\hline
\end{tabular}
\end{center}
\caption{Availability improvement in a 4 hour simulation with single demand change \label{tab:braverman4}}
\end{table}




\begin{figure*}
    \centering
    \begin{subfigure}{0.49\textwidth}
      \includegraphics[scale=0.5]{Figures/DiDi/steps_before_demandChange_every5_15_steps_repeat20.png}
   \end{subfigure}    
  \begin{subfigure}{0.49\textwidth}
        \includegraphics[scale=0.5]{Figures/DiDi/changeMass_changedemand_every10_rep1_welfarelarge_overstatic_ave_repeat10.png}
   \end{subfigure}
    \caption{Comparison of Policies with STA \Figl{fig:didi2}}
\end{figure*}

\subsection{Additional Simulations}

In \Appr{simulations} we present the results additional simulations on synthetic data.  They show that with static demand patterns \dynamic consistently converges substantially faster than other policies and has a performance that is often close to or matching a lower bound.  The effect of this on efficiency (the mass of passengers served) relative to the other policies is, however, quite small.  When targeting an objective for selecting an optimal fixedpoint that puts weight on fairness rather than just efficiency, \dynamic leads to economically meaningful improvements in availability, showings its ability to target a wider range of objectives than previous approaches.
%#############################################################

%\input{discussion}
\section{Discussion} \Secl{discussion}

We have studied a model of relocation policies for ridesharing platforms and given a constructive characterization of the unique fixpoint of system dynamics.  Using this construction, we designed a dynamic policy that provides guarantees about its rate of convergence to the fixpoint and analyzed the magnitude of these benefits in simulations.


%Regarding our contribution, 
%Suppose you were operating such a platform. There are many things about your platform you control: the relocation policy, the price of rides (which indirectly affects $W$), how much drivers are paid (which indirectly affects $q$). You would naturally have questions about how to use these controls to achieve business goals, both on average in some static version of the problem and with finer-grained control in a dynamic version of the problem. Prior work has provided non-constructive answers to particular questions of this sort. In contrast, our results provide a constructive foundation for the analysis of such systems that can be used to address a range of questions. Our dynamic relocation algorithm demonstrates the effectiveness of our theory in addressing one concrete question (dynamic relocation policies) and we believe it useful for many other applications as well. For example, it could also be used to provide insights about how changing prices (and thus $W$) affect the overall efficiency and profitability of the system by using the construction to examine the resulting changes in the fixpoint.

To obtain these results, we used a stylized model. We conclude by discussing the extent to which our results extend to richer models.  Within our basic setup we made use of several assumptions.
First, that drivers do not relocate if there are waiting passengers.  This is typically a mild assumption in models of spatial demand imbalances, but would be more relevant in a study of temporal ones~\citep{ma2019spatio}.
Second, that drivers carry passengers regardless of their destination.  This is a common requirement, but some work has investigated the benefits from allowing strategic passenger selection \citep{afeche2018ride}.
Third, that there is a positive demand in in between every two regions.   This assumption guarantees ergodicity, and in many of our results could likely be reduced to that weaker requirement.  It is also relevant for our convergence rate analysis of dynamic policies, although there it could likely be replaced by an eigenvalue-based bound, as is typical in the analysis of mixing time of Markov chains.
This assumption seems quite reasonable in practice: All it requires is that, for a reasonable decomposition of a city to regions, people occasionally wish to travel from any given part of the city to any other. Unless this grid of regions is very fine this seems quite likely.
Fourth, that the relocation policy is the same for every region.  We believe our results can be obtained without it, albeit with substantial additional notational clutter.  For our experiments on the Didi data, our results demonstrate empirically the effectiveness of this extension.

There are also a number of features our basic model excludes.  
We work with a continuum of drivers, but previous work has shown that dynamics with a continuum of drivers approximately hold with discrete drivers as the number of drivers grows~\citep{braverman2019empty,banerjee2017pricing}.  %\emph{I don't understand this. how second part implies that results hold in a discrete model.}
We also assume that time is discrete and all journeys take a single unit of time.  As in the simulations, discrete but non-uniform journey times can be modeled by introducing additional ``regions'' which represent drivers in transit, and our results likely generalize.\footnote{This would involve relaxing the previously discussed positive demand assumption.}  Adding travel times to the linear programming approach is straightforward, and previous work has observed that, as long as the platform controls the relocation policy, non-uniform travel times do not significantly affect the behavior of the model~\citep{bimpikis2019spatial}.
Journey times of a single pair of regions may vary over time. A key takeaway from prior work is that as long as the number of vehicles is large this factor is essentially irrelevant, which is why we did not include it in our model or experiments~\citep{braverman2019empty}. 


While we do not explicitly consider a dynamic willing demand matrix $W$, it is the motivation for our results in \Secr{convergence} with guarantees on convergence speed towards the fixed point comes from this issue. In particular, policies that are slow to adapt can be stuck, never approaching the fixpoint before $W$ changes. In contrast, our dynamic policy can deal with a changing demand matrix by converging rapidly before the change is so large that the target becomes outdated. Our simulations on synthetic data (the left subfigures of \Figr{40-m-u}, \Figr{40-f-p}, \Figr{regions}, and \Figr{10-a-u} in the appendix) show that the convergence towards the fixed point is indeed very fast and is generally faster than an expected substantial change of the demand. Table \ref{tab:braverman4}, which is drawn from real scenarios, shows similarly rapid adaptation (note the availability in hour 3, after the change in W, is nearly identical to hour 4 for \dynamic while most other methods do substantially better in hour 4 than hour 3.)    Both subfigures  of \Figr{fig:didi2} also deal with dynamic W and the results demonstrate the superior performance of our dynamic policy.

Our constructive algorithm myopically attempts to make as much progress toward the fixpoint as possible.  An interesting direction for future work would be to treat the problem of dynamic relocation as a planning or reinforcement learning problem.
%we agree and think that our work both lays the foundation for such approaches and can be viewed as a simple-yet-surprisingly-effective version of them, In particular, 
Our theoretical results can be viewed as providing a characterization of key aspects of a model to enable model-based approaches. One could imagine adding an approach based on Monte Carlo Tree Search or similar techniques~\citep{sutton2018reinforcement}. From this perspective, the lower bound (\lb) we compare to in some of the additional experiments in the appendix is the optimal plan for a relaxed version of the problem and in those experiments \dynamic shows near-optimal performance.

 %One natural direction is to consider settings where drivers have partial or total control over relocation decisions, and thus the sequential decision problem is faced by drivers rather than the platform. Our results provide key characterizations that justify formulating the problem from the perspective of a single driver in terms of the behavior at the fixpoint induced by the behavior of other drivers, an approach which has been fruitful in other game-theoretic work on marketplaces.

Finally, we do not model prices or monetary relocation costs.  As these do not affect the system dynamics, most of our results would be unchanged.  The primary effect would be to adjust the objective of problem OA %\emph{what's OA}
(\Figr{OA})
accordingly.
Nor do we consider settings where drivers have partial or total control over relocation decisions, and thus the sequential decision problem is faced by drivers rather than the platform.
Of course, a major role of prices is to influence the relocation decisions of rational drivers.
Our results provide key characterizations that justify formulating the problem from the perspective of a single driver in terms of the fixpoint induced by the behavior of other drivers, an approach which has been fruitful in other game-theoretic work on marketplaces~\citep{kash2015equilibrium}.
%##############################################################

\begin{acknowledgements} 
This material is based upon work supported by the National Science Foundation Program on Fairness in AI in collaboration with Amazon under Grant No. 1939743, by the National Science Foundation under Grant No. 1918429, and by the Discovery Partners Institute (DPI) Science Team Seed Grant Program.  DPI is part of the University of Illinois System. 
\end{acknowledgements}

%\bibliographystyle{ACM-Reference-Format}
%\clearpage
\bibliography{kash_613}
%\clearpage
%\appendix
%\input{dynamic_experiment}
%\input{experiment_appendix}
%\input{proofs}
\end{document}




