\documentclass[
  journal=proceedings,
  manuscript=article-type,
  year=2024
]{PMET_proc}

\usepackage{amsmath}
\usepackage[nopatch]{microtype}
\usepackage{booktabs}

% from Michael John's version of proceedings chapter
\usepackage{amsthm}
\usepackage{amssymb}
\usepackage{subcaption} % for subfigures

\newcommand{\indic}{\mathbb{I}}


\title{Bot detection: Simulations and application in people-centered health measurement surveys with missing data}
\author{Carl F. Falk}
\affiliation{Department of Psychology, McGill University, Montreal, Quebec, Canada}
\email[F. Author]{carl.falk@mcgill.ca}

\author{Michael John Ilagan}
\affiliation{Department of Psychology, McGill University, Montreal, Quebec, Canada}

\author{Mathilde G. E. Verdam}
\affiliation{Department of Methodology \& Statistics, Institute of Psychology, Leiden University, Leiden, Netherlands}

\author{Richard Sawatzky}
\affiliation{School of Nursing, Trinity Western University, Langley, British Columbia, Canada\\
Centre for Advancing Health Outcomes, Providence Health Care Research Institute, Vancouver, British Columbia, Canada\\
Institute of Health and Care Sciences, and Centre for Person-Centered Care (GPCC), Sahlgrenska Academy, University of Gothenburg, Gothenburg, Sweden}
\addbibresource{references.bib}

\keywords{Patient-reported outcome measures; Person-centered measurement, Survey bots, Careless responding, Machine Learning, Missing Data, Permutation test} %% First letter not capped

\begin{document}

\begin{abstract}
In the context of improving the measurement of pain and emotional well-being among diverse populations, we sought to detect random responders or survey bots that are not responsive to item content. We adapted the L1P1 algorithm by \citet{l1p1}, which uses a permutation test and outlier statistics to compute a $p$-value and do classification under the null that the response vector is exchangeable. As the response options for the Likert-type items could yield missing data, simulations evaluated two variants of outlier statistic computations that used the expectation-maximization algorithm: one in which means and covariances were pre-computed and re-used for all rows, and another in which a leave-one-out approach was used. Results indicated that the L1P1 algorithm works as expected, but a leave-one-out strategy works best, and respondents with few completed items are flagged at higher rates due to loss of specificity. Based on simulations, we then performed classification for an empirical dataset ($N=11,197$) with 76 Likert-type items. Flagging rates were similarly higher for respondents with fewer completed items, but otherwise low. We therefore expect that random responders would likely not have strong influence on subsequent analyses for this measurement project.
\end{abstract}

\noindent
The equitable people-centered health measurement (EPHM) project aims to improve the measurement of pain and emotional well-being among diverse populations \citep[e.g.,][]{Sawatzkyetal:2024:abstract}.\footnote{Future updates and publications about the project will be posted to: \url{https://www.healthyqol.com}} Towards this end, pain and well-being items \citep{kopecetal2006} are measured in a calibration sample and a mixture item response model is developed for each item bank to incorporate heterogeneity in the measurement parameters. Subsequently, the relation between class probabilities and a host of social determinants of health variables are investigated. The results of these models are then carried forward to develop computer adaptive tests (CAT) based on the mixture model \citep[for supporting research, see][]{sawatzkyetal2018, sajobietal2022, Sawatzkyetal:2024:abstract, Sawatzky:2012}. It is hoped that this approach will help personalize health measurements by tailoring the selection and scoring of items for individuals.

Since a large and diverse calibration sample was desired, the project team collected online data from various sources. However, any online data collection endeavor must worry about contamination with participants who do not take the study seriously, and there is persistent worry regarding computer generated responses by survey bots \citep{perkel2020}. Recent articles review strategies to deal with such bots, including deterrence/prevention as well as detection \citep[e.g.,][]{simone2019, storozuketal2020}. Some steps were taken to prevent survey bots at the time of data collection (e.g., the survey was not widely distributed on social media), and some information was also made available for detection (e.g., checking postal code versus self-reported location).

Here we focus only on efforts to help ensure the integrity of the EPHM calibration sample by detailing an additional flag for random responders or survey bots based on the work of \citet{l1p1}. Under the assumption that Likert-type items are exchangeable for random responders or survey bots, these authors' L1P1 algorithm calibrates sensitivity for detecting such aberrant responses using an unsupervised classification approach. However, using this algorithm with the EPHM data required some modifications for use with missing data. In the remainder of this manuscript, we first describe some details of the EPHM calibration data as it relates to L1P1. We then describe two strategies for adapting L1P1 to the case of missing data, followed by simulations evaluating these two approaches. Finally, we provide results of flagging rates by L1P1 on the actual EPHM calibration sample.

\section{Motivation: EPHM calibration data}

The main focus of measurement consisted of pain and emotional well-being item banks developed by \citet{kopecetal2006}. Included were the original item banks (including items that were removed from the final item banks due to poor fit or suspected differential item functioning).
Given this setup, a measurement model could be designed to detect random responding \citep[e.g., ][]{jinetal2018, ulitzschetal2022bjmsp}. However, we thought it more expedient to use L1P1 as it does not require a known measurement model for humans and it would be quicker to study with a large sample or in simulations. 

Based on \citet{falketalpm}, we would expect L1P1 to perform well for EPHM data.
As L1P1 was initially developed to handle items with the same number of response options \citep{l1p1}, we used just the 76 items (out of 84) with 5 ordinal response options (35 pain, 41 emotional well-being).\footnote{L1P1 has been recently adapted to be able to accommodate inventories with a different number of response options \citep{ilaganimps}.}
In simulations with real measurement instruments and 5 category items, L1P1 achieved good classification accuracy (>90\%) for inventories with greater than 50 items \citep{falketalpm}.
Simulations also suggest that uniform information functions (as opposed to peaked information) are good, which appears to be the case for the item banks considered \citep{kopecetal2006}.

However, the survey also included ``Prefer not to answer'' and ``Do not know'' options for the Likert-type items.
For our purposes, these responses were coded as missing data.
In addition, some respondents did not complete all survey items or did not finish the questionnaire. As a result, we had data from 11,197 respondents who had at least one complete response on the 76 items, with a total of 10.1\% missing data (Figure~\ref{fig:misspat}).
Of these responses, data were collected from an online survey panel ($N=5,336$), from partner health organizations ($N=4,891$), and through a mix of other means ($N=970$).
Since L1P1 does not readily handle missing data, we pursue a modification of it in the following section. %new

\begin{figure}[hbt!]
\centering
\includegraphics[width=0.95\linewidth]{images/MissPat.pdf}
\caption{Missing data patterns}
\label{fig:misspat}
\end{figure}

\section{L1P1 and missing data}

Let $i = 1, \ldots, n$ and $j = 1, \ldots, m$ index respondents and items, respectively, with $z_{ij} \in \{1, 2, \dots, k_j\}$ the observed response to item $j$ for respondent $i$, $\mathbf{z}_i = \begin{bmatrix} z_{i1}, & z_{i2}, & \ldots, & z_{im} \end{bmatrix}^{\top}$ respondent $i$'s full response pattern, and $\mathbf{Z}$ stacking all $n$ response patterns row-wise. Let $y_i \in \{0, 1\}$ be a true class label, with 0 indicating a diligent human and 1 indicating a random responder.

For respondent $i$, classification can be done using the following strategy:

\begin{enumerate}
    \item Randomly permute $\mathbf{z}_i$, to create $B$ new response patterns: $\mathbf{z}_i^{(1)}, \mathbf{z}_i^{(2)}, \ldots, \mathbf{z}_i^{(B)}$.
    \item For $\mathbf{z}_i$ and its $B$ permutations, compute outlier statistics, $\mathbf{x}_i$, and $\mathbf{x}_i^{(1)}, \mathbf{x}_i^{(2)}, \ldots, \mathbf{x}_i^{(B)}$. More than one outlier statistic per response pattern may be computed.
    \item Collapse outlier statistics to a single dimension, $r_i$ and $r_i^{(1)}, r_i^{(2)}, \ldots, r_i^{(B)}$, and rank-order in terms of suspiciousness. The value $p_i$ indicates the proportion of permutations that are at least as suspicious as $\mathbf{z}_i$.
    \item Classify using some threshold $\tau$, $\hat{y}_i = \indic \{ p_i \geq \tau \}$, where $1-\tau$ corresponds to the desired sensitivity (e.g., $\tau=.05$ is 95\% sensitivity).
\end{enumerate}

L1P1 is unique in several respects. At step 1 it assumes random responders have response vectors whose values are exchangeable. Knowledge of the exact data generating mechanism for random responders is not explicitly required, nor is it required for diligent humans.

In addition, step 2 involves computation of outlier statistics.
Two examples are Mahalanobis distance \citep{mahalanobis1936, zijlstraetal2011} and person-total cosine similarity,\footnote{Whereas person-total correlation is the Pearson correlation between a response vector, $\mathbf{z}_i$, and some mean reference vector, e.g., $\hat{\boldsymbol{\mu}}$, person-total cosine similarity is the cosine similarity between these two vectors.} which require an estimate of the item means, $\hat{\boldsymbol{\mu}}$, and covariances, $\hat{\boldsymbol{\Sigma}}$.
For instance, Mahalanobis distance for person $i$ is: % new
\begin{equation}
\label{eq:md}
\sqrt{(\mathbf{z}_i - \boldsymbol{\mu})^{\top}\boldsymbol{\Sigma}^{-1}(\mathbf{z}_i - \boldsymbol{\mu})}
\end{equation}
and person-total cosine similarity is: % new
%\begin{equation}
%\frac{z_i^{\top}\mu}{\sqrt{z_i^{\top}z_i} \sqrt{\mu^{\top} \mu} }
%\end{equation}
\begin{equation}
\label{eq:ptcs}
\frac{\mathbf{z}_i^{\top}\boldsymbol{\mu}}{\lVert \mathbf{z}_i \rVert \lVert \boldsymbol{\mu} \rVert}
\end{equation}
where the numerator contains an inner product and the denominator contains Euclidean norms. Thus, $\mathbf{x}_i$ may be a vector of length two that contains Mahalanobis distance and person-total cosine similarity.
Importantly, L1P1 uses a leave-one-out strategy where $\mathbf{Z}_{-i}$, the original $\mathbf{Z}$ omitting observation $i$, is used to compute $\hat{\boldsymbol{\mu}}_{-i}$ and $\hat{\boldsymbol{\Sigma}}_{-i}$, which are then used to compute $\mathbf{x}_i$, and $\mathbf{x}_i^{(1)}, \mathbf{x}_i^{(2)}, \ldots, \mathbf{x}_i^{(B)}$, from $\mathbf{z}_i$ and $\mathbf{z}_i^{(1)}, \mathbf{z}_i^{(2)}, \ldots, \mathbf{z}_i^{(B)}$.
For example,  $\hat{\boldsymbol{\mu}}_{-i}$ and $\hat{\boldsymbol{\Sigma}}_{-i}$ are substituted for $\boldsymbol{\mu}$ and $\boldsymbol{\Sigma}$ in (\ref{eq:md}) and (\ref{eq:ptcs}).
At step 3, outlier statistics may be collapsed to a single dimension by computing the distance to some ideal, non-suspicious point.\footnote{Any distance metric may suffice, though we used an equation that itself resembled Mahalanobis distance, encompassing the covariance among outlier statistics $\mathbf{x}_i$ and $\mathbf{x}_i^{(1)}, \mathbf{x}_i^{(2)}, \ldots, \mathbf{x}_i^{(B)}$. For more details, we refer to \citet{l1p1}.} For example, (0, +1) are the least suspicious values for Mahalanobis distance and person-total cosine similarity. The distance between (0, +1) and $\mathbf{z}_i$ is represented by $r_i$.

To handle missing data, we pursue modifications to steps 1 and 2. First, step 1 permutes $\mathbf{z}_{i,c}$ to obtain new response patterns, where $\mathbf{z}_{i,c}$ is respondent $i$'s response vector containing only their complete responses. Second and at step 2, under ignorable missing data mechanisms, consistent estimates of means and covariances of the items can often (though not always) be obtained using direct maximum likelihood under the assumption of multivariate normality \citep{yuan2009}. Under the original L1P1 algorithm, leave-one-out would be used for each permutation test. We may omit observation $i$ when obtaining $\hat{\boldsymbol{\mu}}_{-i}$ and $\hat{\boldsymbol{\Sigma}}_{-i}$ to maximize the log-likelihood:
\begin{equation}
\begin{aligned}\label{eq:ll2}
l_{-i}(\boldsymbol{\theta}) = \sum_{i'\ne i}^n \left( -\frac{1}{2}\log|\boldsymbol{\Sigma}_{i',c}(\boldsymbol{\theta})| - \frac{1}{2}(\mathbf{z}_{i',c} - \boldsymbol{\mu}_{i',c}(\boldsymbol{\theta}))^{\top} \boldsymbol{\Sigma}_{i',c}(\boldsymbol{\theta})^{-1} (\mathbf{z}_{i',c} - \boldsymbol{\mu}_{i',c}(\boldsymbol{\theta})) - \frac{1}{2} m_{i'} \log(2\pi) \right)
\end{aligned}
\end{equation}

\noindent where $\boldsymbol{\theta}$ is a vector of parameters (all means and covariances), $\boldsymbol{\mu}_{i,c}(\boldsymbol{\theta})$ and $\boldsymbol{\Sigma}_{i,c}(\boldsymbol{\theta})$ are partitions of $\boldsymbol{\mu}(\boldsymbol{\theta})$ and $\boldsymbol{\Sigma}(\boldsymbol{\theta})$ that correspond to complete observations for respondent $i$, and $m_i$ is their number of complete responses.

Alternatively, since $n$ is large and we need to perform many permutation tests, we may obtain estimates $\hat{\boldsymbol{\mu}} = \boldsymbol{\mu}(\hat{\boldsymbol{\theta}})$ and $\hat{\boldsymbol{\Sigma}} = \boldsymbol{\Sigma}(\hat{\boldsymbol{\theta}})$ just once that maximize the log-likelihood for all respondents:
\begin{equation}
\begin{aligned}\label{eq:ll1}
l(\boldsymbol{\theta}) = \sum_{i=1}^n \left( -\frac{1}{2}\log|\boldsymbol{\Sigma}_{i,c}(\boldsymbol{\theta})| - \frac{1}{2}(\mathbf{z}_{i,c} - \boldsymbol{\mu}_{i,c}(\boldsymbol{\theta}))^{\top} \boldsymbol{\Sigma}_{i,c}(\boldsymbol{\theta})^{-1} (\mathbf{z}_{i,c} - \boldsymbol{\mu}_{i,c}(\boldsymbol{\theta})) - \frac{1}{2} m_i \log(2\pi) \right)
\end{aligned}
\end{equation}

In either case, although the full $\hat{\boldsymbol{\mu}}$ (or $\hat{\boldsymbol{\mu}}_{-i}$) and $\hat{\boldsymbol{\Sigma}}$ (or $\hat{\boldsymbol{\Sigma}}_{-i}$) are obtained for all items, only the complete subset of elements for respondent $i$ are used to compute outlier statistics, $\mathbf{x}_i$, and $\mathbf{x}_i^{(1)}, \mathbf{x}_i^{(2)}, \ldots, \mathbf{x}_i^{(B)}$, from $\mathbf{z}_{i,c}$ and its $B$ permutations.

For both strategies, we obtain estimates using software with the expectation-maximization (EM) algorithm \citep{EMgaussian, Stadler:2012}. When $\hat{\boldsymbol{\mu}}$ and $\hat{\boldsymbol{\Sigma}}$ are re-used for each permutation test, we refer to this as ``pre-computed'', and when leave-one-out is used for each permutation test we refer to this as ``LOO''. The pre-computed strategy slightly violates the premises of the L1P1 algorithm, which could result in a loss of sensitivity. However, it is unclear whether this would occur at such a large sample size. Pre-computing also takes much less time as the item means and covariances only need to be computed once, whereas under LOO Equation~\ref{eq:ll2} needs to be maximized for each row in the dataset.

\section{Simulations}

We conducted a small set of simulations to evaluate algorithm performance for classification of random responders specifically for conditions similar to the EPHM data. We also wanted to evaluate the two strategies for computing item means and covariances under missing data.

\subsection{Data generation and analysis}

To mimic EPHM calibration data, the total sample size for each generated dataset was fixed at 11,197. We manipulated random responder contamination rate (.05, .5, and .95). We generated hypothetical human data by utilizing estimated model parameters for EPHM data based on a 2-dimensional graded response model with a logit link function \citep{Reckase:2009:B, Samejima:1969}, and with very simple structure utilizing all 84 items for both pain and emotional well-being item banks.\footnote{Reverse-worded items were reverse-coded prior to these analyses.} Only item parameters from the 76 5-category items were then used. These model parameter estimates were treated as true values in generating hypothetical responses when $y_i = 0$. Although not corresponding to a mixture model and possibly contaminated with random responders, parameter estimates from this analysis looked reasonable in light of work by \citet{kopecetal2006}. Thus, we argue this is a reasonable strategy for generating hypothetical humans. For $y_i=1$, each item response was drawn from a uniform distribution over the 5 possible response categories. Once a complete dataset with the desired proportion of humans and bots was generated, these rows were randomly sorted, and missing data was induced using the exact same missing data patterns as under the EPHM calibration sample.

We generated 100 datasets per each contamination rate and analyzed them using the pre-compute strategy. Due to computational time, we generated 20 datasets per each contamination rate for use with the LOO strategy. For both strategies, $B=1,000$ permutations were used and person-total cosine similarity and Mahalanobis distance were used as outlier statistics. Since we later wished to be cautious about accidentally flagging humans on the real EPHM data, we used $\tau = .1$ for 90\% sensitivity as we expected it would exhibit more specificity than $\tau=.05$ as used by \citet{l1p1}. Custom R code and results for these simulations are available on the Open Science Framework: \url{https://osf.io/t7br2/}.

\subsection{Results}

We report three calibration metrics: sensitivity ($\sum_{i=1}^n  \indic \left\{\hat{y}_i = y_i = 1\right\}/\sum_{i=1}^n \indic \left\{y_i = 1\right\}$), specificity ($\sum_{i=1}^n  \indic \left\{\hat{y}_i = y_i \allowbreak = 0\right\}/\sum_{i=1}^n \indic \left\{y_i = 0\right\}$), and classification accuracy ($\frac{1}{n}\sum_{i=1}^n  \indic \left\{\hat{y}_i = y_i\right\}$).
Each were averaged across all datasets for all cells of the design, but also binned by the number of complete responses available for any given row.

LOO was able to maintain the target sensitivity as it had around 90\% sensitivity across all contamination rates and across rows with different amounts of missing data (bottom of Figure~\ref{fig:Sens}). However, pre-computing experienced a loss of sensitivity, especially under lower contamination and when the row had more complete responses (top of Figure~\ref{fig:Sens}). Presumably, a more complete response pattern may allow the row to influence mean and covariance estimates, making it look less suspicious.

\begin{figure}[hbt!]
\centering
\includegraphics[width=0.8\linewidth]{images/Sens.pdf}
\caption{Sensitivity}
\label{fig:Sens}
\end{figure}

While specificity looked similar across pre-computing and LOO, it only achieved acceptable rates ($>90\%$) with 20-30 complete responses or more, and when contamination was .5 or .05 (Figure~\ref{fig:Spec}).

\begin{figure}[hbt!]
\centering
\includegraphics[width=0.8\linewidth]{images/Spec.pdf}
\caption{Specificity}
\label{fig:Spec}
\end{figure}

Classification accuracy tended to be slightly better under LOO than it was under pre-computing (Figure~\ref{fig:Acc}). Though in some cases, accuracy was the same or differed by only a percentage point.

\begin{figure}[hbt!]
\centering
\includegraphics[width=0.8\linewidth]{images/Acc.pdf}
\caption{Accuracy}
\label{fig:Acc}
\end{figure}

Overall then, LOO should be slightly preferred as it may flag a few more random responders than will pre-computing, but either strategy should yield rather similar results. 

\section{Empirical results}

On the actual EPHM data, we applied L1P1 by pre-computing means and covariances and with LOO with $B=1,000$ and $\tau=.1$ for 90\% sensitivity. We report results of LOO since it performed slightly better in simulations and resulted in flagging only an additional 16 respondents. The number of flagged respondents (i.e., $\hat{y}_i = 1$) was also binned by the number of complete responses (Table~\ref{tbl:EPHMflag2}). For some respondents a flag could not be generated and was marked as ``NA'' for missing; this typically occurred when the respondent utilized only one or two response categories across all completed items and also when the respondent completed very few items (Table~\ref{tbl:EPHMflag2}). Though flagging rates also tended to be higher when the respondent used few response categories (Table~\ref{tbl:EPHMflag3}). In examining the data source, we noticed that the highest flagging rates were among the online survey panel participants, as opposed to those recruited by partner health organizations or other sources (Table~\ref{tbl:EPHMflag4}). 

\begin{table}[hbt!]
\begin{threeparttable}
\caption{Flag rates for EPHM data by number of completed items}
\label{tbl:EPHMflag2}
\begin{tabular}{lrrrrr}
\hline
Complete Items & N & N Flag & Prop. Flag & N NA & Prop. NA\\
\hline
(0,10] & 415 & 172 & 0.41 & 174 & 0.42\\
\hline
(10,20] & 272 & 73 & 0.27 & 36 & 0.13\\
\hline
(20,30] & 245 & 42 & 0.17 & 17 & 0.07\\
\hline
(30,40] & 391 & 65 & 0.17 & 14 & 0.04\\
\hline
(40,50] & 108 & 10 & 0.09 & 4 & 0.04\\
\hline
(50,60] & 130 & 17 & 0.13 & 2 & 0.02\\
\hline
(60,70] & 251 & 23 & 0.09 & 1 & 0.00\\
\hline
(70,76] & 9385 & 254 & 0.03 & 65 & 0.01\\
\hline
Overall & 11197 & 656 & 0.06 & 313 & 0.03\\
\hline
\end{tabular}
\begin{tablenotes}[hang]
\item[]N = count or sample size; Prop. = Proportion; NA = Missing, no flag could be generated.
%\item[a]First note
%\item[b]Another table note
\end{tablenotes}
\end{threeparttable}
\end{table}

\begin{table}[hbt!]
\begin{threeparttable}
\caption{Flag rates for EPHM data by number of categories used}
\label{tbl:EPHMflag3}
\begin{tabular}{lrrrrr}
\hline
  & 1 & 2 & 3 & 4 & 5\\
\hline
$y_i=0$ & 0 & 148 & 478 & 3498 & 6104\\
\hline
$y_i=1$ & 0 & 260 & 178 & 104 & 114\\
\hline
\end{tabular}
\end{threeparttable}
\end{table}

\begin{table}[hbt!]
\begin{threeparttable}
\caption{Flag rates for EPHM data by data source}
\label{tbl:EPHMflag4}
\begin{tabular}{lrrrrr}
\hline
  & N & N Flag & Prop.Flag & N NA & Prop. NA\\
\hline
Online survey panel & 5336 & 490 & 0.09 & 230 & 0.04 \\
\hline
Partner health organizations & 4891 & 122 & 0.02 & 64 & 0.01 \\
\hline
Other & 970 & 44 & 0.05 & 19 & 0.02 \\
\hline
\end{tabular}
\begin{tablenotes}[hang]
\item[]N = count or sample size; Prop. = Proportion; NA = Missing, no flag could be generated.
%\item[a]First note
%\item[b]Another table note
\end{tablenotes}
\end{threeparttable}
\end{table}

\section{Conclusion}

L1P1 can be combined with modern ways to handle missing data such as the EM algorithm to compute means and covariances for use with outlier statistics. Doing this using LOO for each row appeared to perform best in simulations. Although the performance of L1P1 with real data from heterogeneous populations has been evaluated \citep{falketalpm, l1p1}, the present simulations could be improved by introducing some heterogeneity for simulated humans. Nonetheless, we sought quick answers as to whether such an algorithm was feasible for the real EPHM data. Handling of missing data is now available in a forked version of the \texttt{detranli} package \citep{pkg:detranlifalk} and may eventually be incorporated into the main repository.

Combining results from simulations and the actual EPHM analyses, application of L1P1 with LOO suggests that data collection was not overrun with random responders. Although 6\% and 3\% of respondents were either flagged or a flag could not be generated, the majority of these respondents completed few items. Based on simulations, for those with few complete responses (e.g., less than 20-30), the results of L1P1 may not be trustworthy as many diligent humans may be accidentally flagged. Furthermore, respondents with few completed items would presumably have little influence on any subsequent analyses (i.e., development of the mixture CAT) as they would contribute few responses to the estimated model(s). Such a conjecture could be tested by performing a sensitivity analysis with and without flagged respondents.


\begin{acknowledgement}
We are grateful to the EPHM team, including researchers, healthcare providers, patient partners, and partner organizations who were part of the original project proposal and subsequent implementation: \url{https://webapps.cihr-irsc.gc.ca/decisions/p/project_details.html?applId=450658&lang=en}
\end{acknowledgement}

\paragraph{Funding Statement}

The EPHM project is supported by the Canadian Institutes of Health Research (CIHR), (Project Grant \#468626), and the Canada Research Chairs program (\# CRC-2022-00155).
We acknowledge the support of the Natural Science and Engineering Research
Council of Canada (NSERC), (funding reference number RGPIN-2018-05357 and
DGECR-2018-00083), and the Fonds de recherche du Qu\'{e}bec--Nature et technologies
(2022-PR-298903 {and 2023--2024-B2X-330469}).
Le projet EPHM est soutenu par les Instituts de recherche en santé du Canada (IRSC), [subvention de projet \#468626], et le programme des chaires de recherche du Canada (\# CRC-2022-00155).
Cette recherche a \'{e}t\'{e} financ\'{e}e par le Conseil de recherches en sciences naturelles et en g\'{e}nie du Canada (CRSNG), [num\'{e}ro de r\'{e}f\'{e}rence RGPIN-2018-05357 et DGECR-2018-00083] et les Fonds de recherche du Qu\'{e}bec--Nature et technologies (2022-PR-298903 {et 2023--2024-B2X-330469}).

\paragraph{Competing Interests}

None.


%\endnote in some journals will behave like \footnote; and \printendnotes will not output anything. 
%\printendnotes

\printbibliography

%\appendix

%\section{Example Appendix Section}

%Appendix goes here

\end{document}


%\section{Insert A head here}
%This template uses \texttt{biblatex} and adopts an APA referencing style. If you are using this template on Overleaf, Overleaf's build tool will automatically run \texttt{pdflatex} and \texttt{biber}. If you are compiling this template on your own local \LaTeX{} installation, please execute the following commands:
%\begin{enumerate}
%   \item \verb|pdflatex PMET_proc_template|
%    \item \verb|biber PMET_proc_template|
%   \item \verb|pdflatex PMET_proc_template|
%    \item \verb|pdflatex PMET_proc_template|
%\end{enumerate}

%\subsection{Insert B head here}
%\autocite{l1p1}
%\citep{l1p1}


%\section{Equations}

%%% Numbered equation
%\begin{equation}
%\begin{aligned}\label{eq:first}
%\frac{\partial u(t,x)}{\partial t} = Au(t,x) \left(1-\frac{u(t,x)}{K}\right)
% -B\frac{u(t-\tau,x) w(t,x)}{1+Eu(t-\tau,x)},\\
%\frac{\partial w(t,x)}{\partial t} =\delta \frac{\partial^2w(t,x)}{\partial x^2}-Cw(t,x)
%+D\frac{u(t-\tau,x)w(t,x)}{1+Eu(t-\tau,x)},
%\end{aligned}
%\end{equation}

%\begin{align}\label{eq:another}
%\begin{split}
%\frac{dU}{dt} &=\alpha U(t)(\gamma -U(t))-\frac{U(t-\tau)W(t)}{1+U(t-\tau)},\\
%\frac{dW}{dt} &=-W(t)+\beta\frac{U(t-\tau)W(t)}{1+U(t-\tau)}.
%\end{split}
%\end{align}


%%%% Unnumbered equation
%\begin{align*}
%&\frac{\partial(F_1,F_2)}{\partial(c,\omega)}_{(c_0,\omega_0)} = \left|
%\begin{array}{ll}
%\frac{\partial F_1}{\partial c} &\frac{\partial F_1}{\partial \omega} \\\noalign{\vskip3pt}
%\frac{\partial F_2}{\partial c}&\frac{\partial F_2}{\partial \omega}
%\end{array}\right|_{(c_0,\omega_0)}\\
%&\quad=-4c_0q\omega_0 -4c_0\omega_0p^2 =-4c_0\omega_0(q+p^2)>0.
%\end{align*}


%See Figure~\ref{fig_wide} for a double-column figure; this is always at the top of a following page.