
\appendix
% \section{Code and Data Availability}
% Our dataset is publicly available at \href{https://doi.org/10.5281/zenodo.14774037}{10.5281/zenodo.14774037}. Our code is publicly available at \href{https://github.com/weigertlab/cellcycle/}{github.com/weigertlab/cellcycle}.

\section{Supplementary Methods}
\subsection{Cell culture}
\Fucci-RPE1 cells, kindly provided by Battich et al. [2020], were cultured at 37°C with 5\% CO2 in DMEM/F12 medium (Gibco 11320033), supplemented with 1 \% non-essential amino acids (NEAA) (Gibco 11140-035), 1\% penicillin-streptomycin (Sigma-Aldrich G6784), and 10\% fetal bovine serum (FBS) (Gibco 10437-028). In addition, the H2B-iRFP marker, driven by a PGK promoter, was introduced into the cells using the second-generation lentiviral system with a commercially available plasmid (Addgene: 90237).

\subsection{Imaging}
For imaging, H2B-\Fucci-RPE1 cells were seeded into 96-well plates and cultured under the conditions described above, with Fluorobrite medium (Gibco A1896701) replacing DMEM/F12. For the perturbation experiments, cells were treated with 10 nM Palbociclib (CDK4-6 inhibitor).  Images from four channels—Brightfield, H2B (far red), Cdt1 (red), and Geminin (green)—were acquired every 5 minutes using a PerkinElmer Operetta Microscope with a 20x/0.80 objective (wide-field microscopy). Four or nine tiles per well were captured for each channel, with a 15\% overlap for subsequent stitching. In the images, 1 pixel equates to 0.5979761$\mu m$. The laser intensities and time of exposure for each channel are shown in the table below. 

\begin{table}[h]
    \centering
    \begin{tabular}{l c c}
        \hline
        Channel & Laser intensity & Exposure time \\
        \hline
        \Fucci Green & 25\% & 30ms \\
        \Fucci Red & 15\% & 10ms \\
        H2B Far red & 30\% & 30ms \\
        Brightfield & 50\% & 5ms \\
        \hline
    \end{tabular}
\end{table}



\subsection{Image preprocessing} 
Image preprocessing involved stitching the tiles \cite{preibisch_globally_2009} and applying background subtraction to fluorescent channels using a rolling ball algorithm. Cell nuclei were segmented on the H2B channel with a custom StarDist model \cite{weigert_nuclei_2022} , and tracked across frames using TrackMate \cite{tinevez_trackmate_2017}. Full cell cycle tracks (M-M, tracks encompassing one complete cell division cycle from one mitosis (M) to the next) were isolated using K-Means clustering of interpolated \Fucci signals. Our groundtruth labels were obtained by averaging the fluorescent channels over the nuclei area and taking the logarithm of this signal, with a smooth noise removal. 
The raw fluorescent \Fucci signal is not normalized for background noise (starting at $2^{5}$) and expresses a greater dynamic range in log scale as previously shown in DeepCycle \cite{rappez_deepcycle_2020}. However taking the logarithm increases the dynamic range of the background noise, leading to interesting questions about the proper scale of these tracks.
We average the pixels present in the nucleus for each \Fucci marker and then for the background noise normalization, we express the signal shifted and in units of an $\epsilon$, where $\epsilon$ can be k-th percentile of the signals distribution. We use the 1st percentile $\epsilon = P_1 = (P_{1,f_1}, P_{1,f_2})$. Then to deal with the increased dynamic range from the log, we take the Softplus with $\beta = 1$ of our new units of $\epsilon$ before applying the logarithm:
$$ F = (f_1, f_2), \quad \overline{F} = \dfrac{F}{A} = (\overline{f_1}, \overline{f_2})$$
$$ F^\prime = \dfrac{ \overline{F} - \epsilon}{\epsilon}, \quad \text{\Fucci} = log_2(Softplus(F^\prime)) $$

Where $F$ is the raw Fucci values from imaging of each nucleus pixel, $A$ is the area of the nucleus and $\text{Softplus}(x) = \dfrac{1}{\beta} * log(1 + exp(\beta * x))$. 

\subsection{Identification of biological checkpoints}
To identify the time frame at which phase transitions occur in a sequence of intensities (both in real data and in the model's predictions), we devised a simple threshold-based method that accurately detects the onset of the Fucci green signal (G1/S) and the disappearance of the red signal (S/G2).  

\begin{itemize}
    \item The "linear" signal (not log-transformed) of both channels is normalized between 0 and 1 to ensure comparability across tracks.
    \item The signal is smoothed using a convolution with a window size of 20.
    \item For the green signal, we identify the first time point where the intensity crosses above the 0.05 threshold (5\% of its maximum intensity).
    \item For the red signal, we determine the transition point as the first time it drops below the 0.05 threshold (5\% of its maximum intensity), marking its disappearance.
\end{itemize}



\newpage
\section{Supplementary Tables}
\setcounter{table}{0}
\renewcommand{\thetable}{B \arabic{table}}


\begin{table}[H]
    \floatconts
    {tab:params}
    {\caption{Number of parameters for each sequence model head.}}
    {
        \centering
    \begin{tabular}{ll}
    \toprule
    \textbf{Model} & \textbf{Parameters} \\
    \midrule
    MLP & 1.32 × 10\textsuperscript{6} \\
    Causal CNN & 0.94 × 10\textsuperscript{6} \\
    LSTM & 1.28 × 10\textsuperscript{6} \\
    Mamba & 1.11 × 10\textsuperscript{6} \\
    Transformer & 1.12 × 10\textsuperscript{6} \\
    \bottomrule
        \end{tabular}
    }
\end{table}



\begin{table}[H]
\floatconts
  {tab:sup_table_WT} % Unique label for the table
  {\caption{\textbf{Side by side performance comparison of BF and H2B modalities at predicting \Fucci channels on \dataregular.} Both data modalities present similar results: sequence encoders outperform the single frame method. Moreover H2B only shows modestly better performance than BF.}}
  {
\centering
\begin{adjustbox}{width=1.\textwidth,center}
  \begin{tabular}{lllllllll}
    \toprule
    & \multicolumn{4}{c|}{Brightfield} & \multicolumn{4}{c}{Histone H2B} \\
    \textbf{Models} & $L_{1, FUCCI_1}$ & $L_{1, FUCCI_2}$ & $R^2$ & $DTW$ & $L_{1, FUCCI_1}$ & $L_{1, FUCCI_2}$ & $R^2$ & \DTW \\
\midrule
Single Frame & 0.193 ± 0.066 & 0.146 ± 0.045 & 0.459 ± 0.271 & 3.735 ± 0.863 & 0.183 ± 0.104 & 0.130 ± 0.064 & 0.491 ± 0.431 & 2.595 ± 1.201 \\
Causal CNN & 0.157 ± 0.078 & 0.122 ± 0.049 & 0.608 ± 0.294 & 2.468 ± 0.917 & 0.154 ± 0.105 & 0.118 ± 0.061 & 0.586 ± 0.415 & 2.165 ± 1.210 \\
LSTM & 0.108 ± 0.069 & 0.087 ± 0.047 & 0.749 ± 0.266 & 1.527 ± 0.814 & 0.079 ± 0.065 & 0.075 ± 0.044 & 0.833 ± 0.265 & 1.467 ± 1.161 \\
Causal Transformer & 0.121 ± 0.073 & 0.094 ± 0.048 & 0.720 ± 0.279 & 1.728 ± 0.811 & 0.079 ± 0.057 & 0.079 ± 0.042 & 0.839 ± 0.214 & 1.552 ± 0.955 \\
Mamba & 0.112 ± 0.072 & 0.091 ± 0.049 & 0.739 ± 0.282 & 1.444 ± 0.898 & 0.074 ± 0.056 & 0.075 ± 0.040 & 0.853 ± 0.215 & 1.426 ± 0.949 \\
Transformer & \textbf{0.066} \textbf{± 0.038} & \textbf{0.062} \textbf{± 0.037} & \textbf{0.892} \textbf{± 0.111} & \textbf{1.285} \textbf{± 0.553} & \textbf{0.056} \textbf{± 0.039} & \textbf{0.054} \textbf{± 0.033} & \textbf{0.912} \textbf{± 0.116} & \textbf{1.155} \textbf{± 0.612} \\
\bottomrule
  \end{tabular}
\end{adjustbox}}
\end{table}

\begin{table}[H]
\floatconts
  {tab:sup_table_drug}
  {\caption{\textbf{Performance metrics for  both brightfield and histone H2B modalities on \datadrug.}}
  }
  {
\centering
\begin{adjustbox}{width=1.\textwidth,center}
  \begin{tabular}{lllllllll}
    \toprule
    \textbf{Palbociclib} & \multicolumn{4}{c|}{Brightfield} & \multicolumn{4}{c}{Histone H2B} \\
    \textbf{Models} & $L_{1, FUCCI_1}$ & $L_{1, FUCCI_2}$ & $R^2$ & \DTW & $L_{1, FUCCI_1}$ & $L_{1, FUCCI_2}$ & $R^2$ & \DTW \\
\midrule
Single Frame & 0.239 ± 0.082 & 0.182 ± 0.056 & -0.297 ± 1.064 & 5.329 ± 1.147 & 0.183 ± 0.050 & 0.107 ± 0.048 & 0.260 ± 0.466 & 3.285 ± 0.820 \\
Causal CNN & 0.252 ± 0.113 & 0.161 ± 0.059 & -0.353 ± 1.459 & 4.323 ± 1.302 & 0.149 ± 0.042 & 0.125 ± 0.040 & 0.401 ± 0.376 & 3.077 ± 0.840 \\
LSTM & 0.424 ± 0.101 & 0.229 ± 0.045 & -1.663 ± 1.727 & 3.685 ± 1.678 & 0.140 ± 0.051 & 0.115 ± 0.040 & 0.503 ± 0.404 & \textbf{2.750} \textbf{± 0.838} \\
Causal Transformer & 0.326 ± 0.104 & 0.214 ± 0.049 & -0.728 ± 1.558 & 5.159 ± 1.562 & 0.132 ± 0.034 & 0.111 ± 0.037 & 0.628 ± 0.207 & 3.154 ± 0.872 \\
Mamba & 0.485 ± 0.090 & 0.259 ± 0.045 & -2.244 ± 1.949 & 3.563 ± 1.918 & 0.185 ± 0.078 & 0.134 ± 0.043 & 0.255 ± 0.651 & 2.896 ± 0.903 \\
Transformer & \textbf{0.147} \textbf{± 0.056} & \textbf{0.139} \textbf{± 0.048} & \textbf{0.408} \textbf{± 0.478} & \textbf{3.022} \textbf{± 0.985} & \textbf{0.074} \textbf{± 0.029} & \textbf{0.095} \textbf{± 0.031} & \textbf{0.789} \textbf{± 0.131} & 2.896 ± 1.201 \\
\bottomrule
  \end{tabular}
\end{adjustbox}}
\end{table}


\newpage
\section{Supplementary Figures}

\begin{figure}[h]
\floatconts
  {fig:correlation_checkpoints}
  {\caption{\textbf{Distribution of Phase Transition Timings in Ground Truth and Model Predictions Across Different Temporal encoders for Brightfield Imaging} The joint distribution of GT and predicted timings is here represented for the \dataregular data. The non-causal transformer is able to outperform the other temporal encoders.
  }} 
  {\includegraphics[width=1\linewidth]{Figures/squared_correaltion_plot.pdf}} 
\end{figure}



\begin{figure}[h]
\floatconts
  {fig:histograms}
  {\caption{\textbf{Error Distribution of Predictions of \Fucci on Test Set with Brightfield.} \textbf{a.} Distribution of L1 errors across the different  models
\textbf{b.} Error Distributions with Q1, Median and Q3 Percentiles overlayed
\textbf{c.} Q1, Median and Q3 Error Predictions visualized per model.
  }} 
  {\includegraphics[width=1\linewidth]{Figures/histograms-representative-tracks.pdf}} 
\end{figure}


\begin{figure}[h]
\floatconts
  {fig:umap}
  {\caption{\textbf{Learned Latent Space Representations (UMAP).} Each frame of a track is represented as a dot in umap space, the coloring is the normalized time \textbf{a.} Single Frame (no history).
\textbf{b.} Transformer (full sequence).
  }} 
  {\includegraphics[width=1\linewidth]{Figures/umaps/umaps_better_res.pdf}} 
\end{figure}

\begin{figure}[h]
\floatconts
  {fig:visual_landmarks}
  {\caption{\textbf{Predicted $\Delta t_{G1/S}$ and $\Delta t_{S/G2}$ from  BF images for the different models.} 
  }} 
  {\includegraphics[width=1\linewidth]{Figures/visualize_landmark_predictions.pdf}} 
\end{figure}




\begin{figure}[t!]
\floatconts
  {fig:partial_track_fucci}
  {\caption{\textbf{Comparative Performance of Temporal Encoders in Predicting \Fucci1 and \Fucci2 from BF and H2B in partial cell cycle tracks on \dataregular.}
 Error maps showing the prediction error of the different models, assessed on the last frame of segments from the M-M track, spanning indices $\tau_1$ to $\tau_2$.  
 }}
{\includegraphics[width=1\linewidth]{Figures/triang.pdf}}
\end{figure}


\begin{figure}[h]
\floatconts
  {fig:predictions_h2b}
  {\caption{\textbf{Comparative Performance of Temporal Encoders in Predicting Continuous Cell Cycle States from H2B Imaging in Unperturbed RPE Cells.} \textbf{a)} Distribution of L1 errors across the different  models.
\textbf{b)} Example predictions of \Fucci signals from different models on two tracks: one with accurate predictions and one with poor predictions. The ground truth signal is shown in black.
\textbf{c)} Average prediction error and  \textbf{d.} ground truth standard deviation are plotted in function of  cell cycle phases. 
  }} 
  {\includegraphics[width=1\linewidth]{Figures/Predictions_h2b.pdf}} 
\end{figure}





\begin{figure}[t!]
\floatconts
  {fig:partial_track_h2b}
  {\caption{\textbf{Comparative Performance of Temporal Encoders in Predicting Continuous Cell Cycle States from H2B in partial cell cycle tracks.}
  Error maps showing the prediction error of the different models, assessed on the last frame of segments from the M-M track, spanning indices $\tau_1$ to $\tau_2$. 
 }}
{\includegraphics[width=1\linewidth]{Figures/Partial_track_h2b.pdf}}
\end{figure}


% \begin{figure}[t!]
%     \floatconts
%       {fig:partial_track_fucci_drug}
%       {\caption{\textbf{Comparative Performance of Temporal Encoders in Predicting \Fucci1 and \Fucci2 from BF and H2B in partial cell cycle tracks on \datadrug.}
%      Error maps showing the prediction error of the different models, assessed on the last frame of segments from the M-M track, spanning indices $\tau_1$ to $\tau_2$.  
%      }}
%     {\includegraphics[width=1\linewidth]{Figures/triang_drug.pdf}}
%     \end{figure}
    



