\documentclass{midl} % Include author names

% The following packages will be automatically loaded:
% jmlr, amsmath, amssymb, natbib, graphicx, url, algorithm2e
% ifoddpage, relsize and probably more
% make sure they are installed with your latex distribution

\usepackage{mwe} % to get dummy images
\usepackage{soul}
\usepackage{booktabs}
\jmlrvolume{-- Under Review}
\jmlryear{2026}
\jmlrworkshop{Full Paper -- MIDL 2026 submission}
\editors{Under Review for MIDL 2026}

\title[A Multi-Scale Inception-UNet for Branch-Preserving Segmentation]%
{A Multi-Scale Inception-UNet with Structure-Aware \\ Evaluation
for Branch-Preserving Segmentation of Organoids}

 % Use \Name{Author Name} to specify the name.
 % If the surname contains spaces, enclose the surname
 % in braces, e.g. \Name{John {Smith Jones}} similarly
 % if the name has a "von" part, e.g \Name{Jane {de Winter}}.
 % If the first letter in the forenames is a diacritic
 % enclose the diacritic in braces, e.g. \Name{{\'E}louise Smith}

 % Two authors with the same address
 % \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\and
 %  \Name{Author Name2} \Email{xyz@sample.edu}\\
 %  \addr Address}

 % Three or more authors with the same address:
 % \midlauthor{\Name{Author Name1} \Email{an1@sample.edu}\\
 %  \Name{Author Name2} \Email{an2@sample.edu}\\
 %  \Name{Author Name3} \Email{an3@sample.edu}\\
 %  \addr Address}


% Authors with different addresses:
% \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\\
% \addr Address 1
% \AND
% \Name{Author Name2} \Email{xyz@sample.edu}\\
% \addr Address 2
% }

% More complicate cases, e.g. with dual affiliations and joint authorship
\midlauthor{\Name{Author Sandra H. Andrusca\nametag{$^{1}$}} \orcid{1111-2222-3333-4444} \Email{sandra.andrusca@tum.de}\\
\Name{Author Christopher D. Kießling \nametag{$^{1}$}} \Email{christopher.kiessling@tum.de}\\
\Name{Author Andreas R. Bausch \nametag{$^{1}$}} \Email{abausch@mytum.de}\\
\addr $^{1}$ Heinz Nixdorf Chair in Biophysical Engineering of Living Matter\\
\addr Center for Protein Assemblies (CPA)\\
\addr Technical University of Munich (TUM)\\
\addr 85748 Garching, Germany
}

\begin{document}

\maketitle

\begin{abstract}
Branched organoids exhibit increasingly complex morphologies as they progress from simple spheroid states to highly ramified structures, making topology-preserving segmentation essential for quantitative biological analysis. Capturing thin protrusions and maintaining branch continuity remains challenging for classical UNet-based architectures, particularly in brightfield imaging where fine structures are easily blurred or disconnected.

In this work, we present a multi-scale Inception-UNet designed to capture the heterogeneous spatial scales of branched organoids through parallel convolutional paths with complementary receptive fields. As a model system, we analyze brightfield pancreatic ductal adenocarcinoma (PDAC) organoids, a system known for strong morphological heterogeneity and invasive branching behavior~\cite{randriamanantsoa2022pdacOrganoids}, cultured using high-throughput Patternoid assays~\cite{kurzbach2025patternoid} that enable standardized imaging and robust quantitative analysis.

To assess segmentation quality beyond region overlap, we combine Dice with the structure-aware clDice metric that directly probes branch integrity and topological continuity. Across deterministic seeds and strictly separated organoid positions, the Inception-UNet achieves the highest region-based Dice ($0.868 \pm 0.062$) and clDice ($0.545 \pm 0.123$), and most importantly, the strongest preservation of branch continuity compared to UNet and UNet++. These improvements become increasingly pronounced with growing morphological complexity.

Overall, our results demonstrate that multi-scale feature extraction combined with topology-aware evaluation substantially improves segmentation of branched organoids and provides a robust foundation for downstream morphological and invasion-related analyses.
\end{abstract}



\begin{keywords}
semantic segmentation, organoid imaging, PDAC, multi-scale architectures, UNet, topology preservation, topology-aware loss, clDice, brightfield microscopy, multi-seed evaluation, branch morphology
\end{keywords}


\section{Introduction}

Branched organoids have emerged as an important model system for studying epithelial morphogenesis, invasion, and structural organization. As these systems transition from compact spheroids into highly ramified architectures, branch integrity and topological structure become central descriptors of their biological behavior and downstream quantitative analyses.~\cite{clevers2016organoids, boj2015pdacOrganoids}. Pancreatic ductal adenocarcinoma (PDAC) organoids in particular exhibit pronounced morphological heterogeneity and complex, invasive branching dynamics~\cite{randriamanantsoa2022pdacOrganoids}, making them a demanding but informative benchmark for topology-preserving segmentation.

In this work, we analyze PDAC organoids imaged using high-throughput Patternoid assays~\cite{kurzbach2025patternoid}, which provide standardized 3D culture conditions and longitudinal brightfield acquisition across large organoid populations. The resulting morphologies contain both thin protrusions and broader structural compartments, requiring segmentation methods that can jointly capture fine-scale and coarse-scale features.

Classical UNet architectures~\cite{ronneberger2015unet} remain the dominant approach for biomedical image segmentation, yet their single-scale convolutional blocks often struggle to preserve thin branches in brightfield organoids. UNet++~\cite{zhou2018unetpp} improves feature propagation through nested skip connections, while Inception-style multi-scale representations~\cite{szegedy2015goingdeeper} and residual designs~\cite{he2016resnet} have proven effective for capturing heterogeneous spatial patterns in natural images. However, these ideas have not been systematically adapted to branched organoid morphologies, where preserving topological continuity is essential for downstream quantitative analysis.

To address these limitations, we introduce a multi-scale Inception-UNet that integrates parallel convolutional paths with complementary receptive fields within each encoder stage. This design allows the network to model both thin, elongated branches and larger morphological compartments simultaneously. Because region-based Dice~\cite{long2015fcn} primarily reflects area overlap and is insensitive to many topological defects, we complement it with the structure-aware clDice metric that directly probes centerline continuity and branch preservation. All models are trained within a deterministic multi-seed PyTorch pipeline~\cite{paszke2019pytorch} to ensure reproducibility.

Under identical training and evaluation settings with strictly separated organoid positions, the Inception-UNet achieves the highest Dice and skeleton-based Dice among all compared architectures and shows the clearest improvements in branch continuity. These results highlight the importance of multi-scale feature extraction and topology-aware evaluation for analyzing the complex morphologies of branched organoids.

Our contributions are threefold:
\begin{itemize}
    \item We introduce a multi-scale Inception-UNet architecture tailored to the branched morphology of PDAC organoids cultured in high-throughput Patternoid assays, and evaluate it within a deterministic multi-seed analysis pipeline with strictly separated organoid positions.
    \item We establish a topology-aware evaluation protocol that combines region-based Dice with a structure-aware clDice metric to specifically probe branch continuity and structural integrity beyond conventional overlap measures.
    \item We provide an initial analysis of a skeleton-aware auxiliary loss that augments standard region-based supervision with topology-focused signals, illustrating how explicit branch-aware objectives can further enhance structural fidelity.
\end{itemize}


\section{Materials and Methods}\label{sec:methods}
\subsection{Dataset}

We analyze brightfield microscopy data of pancreatic ductal adenocarcinoma (PDAC) organoids, characterized by heterogeneous and invasive growth patterns~\cite{randriamanantsoa2022pdacOrganoids}. The dataset is generated using the Patternoid high-throughput platform~\cite{kurzbach2025patternoid}, enabling standardized culture and imaging.

Organoids are recorded as hourly time-lapse sequences, each consisting of a brightfield $z$-stack ($\sim$100 slices). For segmentation, stacks are converted into 2D standard-deviation (STD) projections computed over in-focus slices, enhancing local contrast and improving visibility of thin branches.

The dataset spans three PDAC morphological subtypes~\cite{boj2015pdacOrganoids, clevers2016organoids}, ranging from compact to highly branched structures. Since we address binary segmentation, all subtypes are pooled to increase morphological diversity.


\begin{figure}[htbp]
 % Caption and label go in the first argument and the figure contents
 % go in the second argument
\floatconts
  {fig:dataset_overview}
  {\caption{Representative STD projections of PDAC organoids across four time windows, illustrating the transition from compact to highly branched morphologies. Bottom: example with ground truth and overlay highlighting thin branches. Scale bar: 50\,\textmu m.}}
  {\includegraphics[width=0.8\linewidth]{Figures_Methods/dataset.png}}
\end{figure}

To avoid temporal and positional leakage, we enforce strict separation of organoid positions: all time points from a given organoid are assigned to exactly one of the training, validation, or test sets. Evaluation is therefore performed on entirely unseen organoids rather than additional time points of already observed ones.

Manual pixel-wise annotations were generated by trained experts on the projected images. 
The final dataset comprises 361 training images, 101 validation images, and 81 test images, 
corresponding to distinct organoid instances under the position-based split described above.


\subsection{Preprocessing}

Each $z$-stack is rigidly registered to correct for drift, followed by selection of in-focus slices and computation of a 2D STD projection. This projection enhances structural details, particularly thin protrusions.

To preserve spatial resolution, images are not resized. Instead, each frame is padded to $768 \times 768$ pixels and processed using overlapping $256 \times 256$ tiles. During training, tiles are sampled using a foreground-aware strategy, while inference uses weighted stitching to reconstruct full-resolution predictions without boundary artifacts.


\subsection{Inception-UNet Architecture}

The proposed Inception-UNet extends the standard UNet by replacing encoder blocks with lightweight multi-scale Inception modules (Figure~\ref{fig:architecture}). 
These modules combine parallel convolutions with different receptive fields to capture both local and global context.

Each encoder block consists of two branches: a $3\times3$ convolution for local features and a larger receptive-field branch approximating $7\times7$ context. 
The outputs are concatenated and fused via a $1\times1$ convolution, followed by normalization and non-linearity.

To enhance feature representation, we incorporate channel-wise recalibration (SE blocks) and residual connections, improving stability and preserving fine spatial details.

The decoder follows the standard UNet design with upsampling and skip connections. 
All models are matched in depth and parameter scale to ensure a fair comparison.


\begin{figure}[htbp]
    \centering
    \includegraphics[width=1\linewidth]{Figures_Methods/Inception_Network_Architecture_Schematic_new.png}
    \caption{
    Overview of the proposed Inception-UNet architecture.
    Each encoder stage replaces standard convolutional blocks with a two-branch multi-scale module consisting of a local ($3\times3$) convolution and a larger receptive-field branch approximating a ($7\times7$) context.
    The branch outputs are combined and projected via a ($1\times1$) convolution, followed by batch normalization and ReLU activation. Channel-wise feature responses are recalibrated using a squeeze-and-excitation (SE) block, and residual connections are employed to stabilize training and preserve fine spatial details.
    The decoder follows the standard UNet design with upsampling, skip connections, and ($3\times3$) refinement convolutions.}
    \label{fig:architecture}
\end{figure}


\subsection{Structure-Aware Evaluation}\label{sec:structure_evaluation}

Region-based metrics such as Dice primarily quantify area overlap but are insensitive to topological errors, particularly in thin and elongated structures. 
For branched PDAC organoids, such errors can affect downstream analyses of branch continuity and morphology.

To capture structural fidelity, we complement Dice with the clDice metric, which evaluates overlap of soft skeleton representations and is sensitive to discontinuities and missing branches.

In addition, we consider a connectivity-based penalty that quantifies local continuity along predicted skeletons. 
This term is used to derive an aggregated topology score:
\begin{equation}
\mathrm{TopoScore} = \mathrm{clDice} - \lambda \cdot \mathrm{GraphPenalty}.
\end{equation}

This score provides a complementary perspective by combining overlap-based and connectivity-based aspects of structural quality.

Together, these metrics enable a more comprehensive evaluation, revealing structural differences that are not captured by region-based Dice alone.


\subsection{Exploratory Topology-Aware Loss}\label{sec:skel_loss}

We explore whether explicit topology-aware supervision can improve structural fidelity during training.

The baseline loss combines Dice and binary cross-entropy:
\begin{equation}
\mathcal{L}_{\mathrm{reg}} =
\alpha \, \mathcal{L}_{\mathrm{Dice}} +
\beta \, \mathcal{L}_{\mathrm{BCE}}.
\end{equation}

To incorporate structural information, we add auxiliary terms operating on soft skeleton representations. 
A soft skeleton Dice term encourages alignment of predicted and reference centerlines:
\begin{equation}
\mathcal{L}_{\mathrm{skel}} =
1 - \mathrm{Dice}(S_{\mathrm{pred}}, S_{\mathrm{gt}}),
\end{equation}

and a continuity term penalizes fragmented predictions:
\begin{equation}
\mathcal{L}_{\mathrm{graph}} =
\frac{1}{|\Omega|} \sum_{p \in \Omega} \max(0, \tau - N(p)).
\end{equation}

The final loss is defined as
\begin{equation}
\mathcal{L} =
\alpha \, \mathcal{L}_{\mathrm{Dice}} +
\beta \, \mathcal{L}_{\mathrm{BCE}} +
\gamma \, \mathcal{L}_{\mathrm{skel}} +
\delta \, \mathcal{L}_{\mathrm{graph}}.
\end{equation}

These terms are evaluated as exploratory extensions. While they can improve structure-aware metrics, 
they do not consistently outperform simpler formulations and may lead to slightly inflated segmentations. 
We therefore retain the simpler loss for the main experiments.


\subsection{Training Procedure}\label{sec:training_procedure}

All models are trained under a deterministic multi-seed protocol using seeds $\{0,1,2,3,4\}$. 
Random number generators in Python, NumPy, and PyTorch are initialized per seed to control stochastic effects, 
while allowing non-deterministic backend optimizations for computational efficiency.

Models are optimized using Adam with fixed hyperparameters shared across all architectures. 
Hyperparameters are selected once based on a reference UNet model and kept constant to ensure a fair comparison.

Training is performed with early stopping based on validation performance. 
For ablation studies, models are trained for up to 100 epochs (patience 15), 
while final experiments use up to 150 epochs (patience 30) to ensure convergence. 

Model selection is based on the best validation score.

\subsection{Baseline Models}

We compare the proposed Inception-UNet with two widely used architectures for biomedical image segmentation: UNet~\cite{ronneberger2015unet} and UNet++~\cite{zhou2018unetpp}. 

To ensure a fair comparison, all models are matched in encoder--decoder depth, channel width, and overall parameter scale. 

\paragraph{UNet.}
UNet follows a standard encoder--decoder architecture with symmetric skip connections for multi-scale feature fusion.

\paragraph{UNet++.}
UNet++ extends UNet with nested skip connections that refine encoder features before fusion in the decoder.

\paragraph{Training Conditions.}
All models are trained under identical settings, including optimizer, learning rate, batch size, data splits, and deterministic multi-seed protocol. 
Unless stated otherwise, models are optimized using a Dice + BCE loss, optionally augmented with topology-aware terms (e.g., clDice).

\section{Results}\label{sec:results}
\subsection{Quantitative Evaluation}

We evaluate UNet, UNet++, and the proposed Inception-UNet on the held-out test set using region-based Dice and the structure-aware clDice metric (Section~\ref{sec:structure_evaluation}).
All values are reported as mean $\pm$ standard deviation across deterministic seeds.

Statistical significance is assessed using paired Wilcoxon signed-rank tests on per-image values averaged across seeds (mean-over-seeds).

\begin{figure}[htbp]
    \centering
    \includegraphics[width=1\linewidth]{Figures_Results/Result_examples.png}
\caption{
Qualitative comparison of segmentation performance on PDAC organoids. 
From left to right: STD projection, ground truth, UNet, UNet++, and Inception-UNet. 
Top row: example of a small organoid. UNet under-segments the structure and fails to capture its full extent, while UNet++ tends to over-segment and produces inflated masks. The Inception-UNet more accurately recovers the overall shape and boundary.
Bottom row: highly branched organoid. Both UNet and UNet++ produce fragmented predictions and fail to preserve thin branches, particularly along the lower protrusion. In contrast, the Inception-UNet yields more continuous segmentations and better maintains branch connectivity.
Scale bar: 100\,\textmu m.}
    \label{fig:quantitative_results}
\end{figure}

The Inception-UNet achieves the highest Dice scores across seeds. 
Wilcoxon testing confirms significant improvements over both UNet and UNet++ ($p < 0.001$, $r > 0.8$).

For structure-aware evaluation, skeleton-based Dice reveals clearer separations between models. 
The Inception-UNet significantly outperforms both baselines ($p < 0.01$), indicating improved preservation of thin and elongated structures.

Overall, the proposed model improves both region overlap and structural fidelity compared to standard architectures.

\begin{table}[t]
\centering
\caption{Segmentation performance (mean $\pm$ std over seeds).}
\label{tab:results}
\begin{tabular}{lccc}
\toprule
Model & Dice & clDice & TopoScore \\
\midrule
UNet & 0.862 $\pm$ 0.076 & 0.438 $\pm$ 0.168 & 0.309 $\pm$ 0.214 \\
UNet++ & 0.828 $\pm$ 0.094 & 0.442 $\pm$ 0.163 & 0.043 $\pm$ 0.208 \\
Inception-UNet & \textbf{0.868 $\pm$ 0.062} & \textbf{0.545 $\pm$ 0.123} & \textbf{0.311 $\pm$ 0.193} \\
\bottomrule
\end{tabular}
\end{table}

Table~\ref{tab:results} summarizes the overall segmentation performance across models, showing that the Inception-UNet achieves the highest scores across all metrics, with the largest improvements observed in structure-aware measures.

\subsection{Structural Fidelity Beyond Region Overlap}

Region-based Dice captures overlap but fails to reflect topological errors such as broken or disconnected branches. 
As shown in Figure~\ref{fig:quantitative_results}, UNet and UNet++ frequently produce discontinuities in thin protrusions, 
whereas the Inception-UNet maintains more consistent centerlines and connectivity.

These differences are captured by skeleton-based Dice, which reveals structural discrepancies that remain largely invisible to region-based metrics.

\subsection{Stability Across Seeds}

All models show low seed-to-seed variation, indicating stable training. 
The relative ranking remains consistent, with Inception-UNet achieving the best performance across metrics.

\subsection{Ablation Studies}

We perform controlled ablations to assess the impact of preprocessing, loss design, and architecture on segmentation performance.

\subsubsection{Resolution Ablation}

We compare a conventional downsampling approach ($128 \times 128$) with a resolution-preserving pad-and-tile strategy ($768 \times 768$ with $256 \times 256$ patches).

The resolution-preserving approach consistently improves performance, particularly for structure-aware metrics. 
While region-based Dice shows only moderate differences, topology-aware scores benefit substantially from maintaining native spatial resolution.

These results highlight that aggressive downsampling degrades the representation of thin and elongated structures, leading to reduced topological fidelity.

\subsubsection{Loss Ablation}
\label{sec:loss_ablation}
To isolate the effect of topology-aware supervision, we perform a loss ablation study using a fixed UNet architecture under identical training conditions.

We compare a region-based baseline (BCE + Dice) with topology-aware extensions, including clDice and a more complex custom formulation combining skeleton and graph constraints.

\begin{figure}[htbp]
    \centering
    \includegraphics[width=0.6\linewidth]{Figures_Results/loss_comp.png}
    \caption{Loss ablation on a fixed UNet architecture using the structure-aware clDice metric. Boxplots show per-image clDice scores on the test set, averaged across five deterministic seeds (mean-over-seeds). Statistical significance is assessed using paired Wilcoxon signed-rank tests. The addition of clDice significantly improves structural performance over the region-based baseline ($p < 0.001$), while the more complex custom loss does not yield further gains.}
    \label{fig:loss_comp}
\end{figure}

Region-based Dice remains largely unchanged across all configurations, whereas structure-aware metrics show clear differences. 
The addition of clDice consistently improves structural fidelity, while more complex loss formulations do not provide further benefits and may introduce slightly thicker segmentations.

Based on these results, we retain the simpler \textbf{BCE + Dice + clDice} formulation for subsequent experiments.

\subsubsection{Architecture Ablation}

To isolate the effect of network architecture, we compare UNet, UNet++, and the proposed Inception-UNet under identical training conditions, including preprocessing, data splits, and loss formulation.

\begin{figure}[htbp]
    \centering
    \includegraphics[width=0.6\linewidth]{Figures_Results/arch_comp.png}
    \caption{Architecture comparison using the structure-aware clDice metric.
Boxplots show per-image clDice scores on the test set, averaged across five deterministic seeds (mean-over-seeds). Statistical significance is assessed using paired Wilcoxon signed-rank tests. The proposed Inception-UNet significantly outperforms both UNet and UNet++ ($p < 0.001$), while no significant difference is observed between UNet and UNet++ (n.s.).}
    \label{fig:arch_comp}
\end{figure}

All models are matched in encoder--decoder depth and parameter scale to ensure a controlled comparison.

Across architectures, region-based Dice remains comparable, whereas structure-aware metrics reveal clear differences. 
The Inception-UNet consistently achieves higher clDice scores, indicating improved preservation of thin branches and connectivity.

Importantly, these gains are not caused by inflated segmentations, but reflect more coherent centerline reconstruction, as confirmed by qualitative inspection.

Based on these results, we select the Inception-UNet for subsequent experiments.

\subsection{Final Model Comparison and Statistical Analysis}

We compare UNet, UNet++, and the proposed Inception-UNet using paired Wilcoxon signed-rank tests on the test set. 
Per-image metrics are first averaged across deterministic training seeds (mean-over-seeds), yielding one value per image and model.

The Inception-UNet achieves significantly higher region-based Dice scores than both UNet and UNet++ ($p < 10^{-10}$, $r > 0.85$), 
indicating consistent improvements across the dataset.

For structure-aware metrics, Inception-UNet also significantly outperforms both baselines ($p < 0.01$), 
confirming improved preservation of thin branches and connectivity. 
While absolute differences are smaller, effect sizes remain moderate to large, 
indicating systematic improvements rather than isolated cases.

Importantly, these gains are not accompanied by inflated segmentations. 
Qualitative inspection shows that Inception-UNet preserves coherent centerlines 
while maintaining compact foreground regions.


\subsection{Exploratory Topology-Aware Loss}

We additionally evaluate a skeleton-aware loss that combines region-based supervision with explicit skeleton and connectivity constraints. 
While this formulation improves clDice in some cases, it does not consistently outperform the simpler clDice-based loss 
and can lead to slightly inflated segmentations.

We therefore retain the BCE + Dice + clDice formulation for the main experiments.


\section{Discussion}
\label{sec:discussion}

We demonstrate that topology-preserving segmentation of highly branched PDAC organoids from brightfield microscopy is feasible despite low contrast and limited annotations. 
While region-based Dice remains similar across models, structure-aware metrics reveal substantial differences in the preservation of thin branches and connectivity.

The proposed Inception-UNet achieves the best overall performance, particularly in later, highly branched stages. 
This suggests that multi-scale feature extraction is beneficial for capturing heterogeneous spatial structures. 
In contrast, UNet++ does not provide consistent advantages in this setting, indicating that nested skip refinements are less effective under limited structural contrast.

Our results further highlight the importance of topology-aware evaluation. 
The clDice metric reveals structural differences that are not captured by region overlap alone, emphasizing the need for complementary evaluation strategies.

Overall, we establish a reproducible benchmark for topology-aware segmentation of brightfield organoids and show that multi-scale architectures improve structural fidelity without sacrificing compactness.

\section{Outlook}
\label{sec:outlook}

Future work will explore more advanced topology-aware losses and evaluation metrics 
to better capture subtle structural defects in complex organoid morphologies.









% This is where the content of your paper goes.  Some random
% notes\footnote{Random footnote are discouraged}:
% \begin{itemize}
%\item You should use \LaTeX \cite{Lamport:Book:1989}.
%\item JMLR/PMLR uses natbib for references. For simplicity, here, \verb|
%\cite|  defaults to
%  parenthetical citations, i.e. \verb|\citep|. You can of course also
%  use \verb|\citet| for textual citations.
%\item Eprints such as arXiv papers can of course be cited %
%\cite{Hinton:arXiv:2015:Distilling}. We recomend using a \verb|@misc| %bibtex entry for these as shown in the sample bibliography.
%\item You should follow the guidelines provided by the conference.
%\item Read through the JMLR template documentation for specific \LaTeX
%  usage questions.
%\item Note that the JMLR template provides many handy functionalities
%such as \verb|\figureref| to refer to a figure,
%e.g. \figureref{fig:example},  \verb|\tableref| to refer to a table,
%e.g. \tableref{tab:example} and \verb|\equationref| to refer to an %equation,
%e.g. \equationref{eq:example}.
%\end{itemize}


%\begin{algorithm2e}
%   \caption{Computing Net Activation}
%   \label{alg:net}
%   \KwIn{$x_1, \ldots, x_n, w_1, \ldots, w_n$}
%   \KwOut{$y$, the net activation}
%   $y\leftarrow 0$\;
%   \For{$i\leftarrow 1$ \KwTo $n$}{
%   $y \leftarrow y + w_i*x_i$\;
%}
%\end{algorithm2e}

\clearpage  % Acknowledgements, references, and appendix do not count toward the page limit (if any)
% Acknowledgments---Will not appear in anonymized version
\midlacknowledgments{We thank Sophie Kurzbach for providing the experimental data. We acknowledge the support by the Deutsche Forschungsgemeinschaft (DFG, German Research Foundation) through the project BA 2029/15-1 and by the European Research Council under the European Union's Horizon 2020 research and innovation programme (grant agreement no. 810104-PoInt).}

\bibliography{midl-samplebibliography}


\appendix

\section{Topology-Aware Loss Formulations}
\label{sec:appendix_losses}

In addition to standard region-based supervision, we explored topology-aware 
loss formulations to encourage structural consistency in thin and elongated 
organoid branches. This section summarizes the loss components used in the 
loss ablation study (Section~\ref{sec:loss_ablation}).

\subsection{Region-Based Loss}

The baseline loss combines binary cross-entropy (BCE) and soft Dice loss:
\begin{equation}
\mathcal{L}_{\mathrm{reg}} =
\alpha \, \mathcal{L}_{\mathrm{Dice}} +
\beta \, \mathcal{L}_{\mathrm{BCE}}.
\end{equation}

\subsection{Soft Skeleton Overlap (clDice)}

To incorporate topology-aware supervision without requiring hard skeletonization, 
we employ the clDice formulation:
\begin{equation}
\mathcal{L}_{\mathrm{clDice}} =
1 - \frac{2 \, T_{\mathrm{prec}} \cdot T_{\mathrm{sens}}}
         {T_{\mathrm{prec}} + T_{\mathrm{sens}} + \varepsilon},
\end{equation}
with
\begin{align}
T_{\mathrm{prec}} &=
\frac{\lvert S_{\mathrm{pred}} \cap M_{\mathrm{gt}} \rvert}
     {\lvert S_{\mathrm{pred}} \rvert + \varepsilon}, \\
T_{\mathrm{sens}} &=
\frac{\lvert S_{\mathrm{gt}} \cap M_{\mathrm{pred}} \rvert}
     {\lvert S_{\mathrm{gt}} \rvert + \varepsilon}.
\end{align}

\subsection{Custom Topology Loss (Skeleton + Graph)}

As an alternative, we define a custom topology-aware loss that combines explicit 
skeleton supervision with a connectivity constraint:
\begin{equation}
\mathcal{L}_{\mathrm{topo}} =
\mathcal{L}_{\mathrm{skel}} + \mathcal{L}_{\mathrm{graph}}.
\end{equation}

The skeleton supervision term is given by
\begin{equation}
\mathcal{L}_{\mathrm{skel}} =
\mathrm{BCE}\left(S_{\mathrm{pred}}^{\mathrm{hard}},
                  S_{\mathrm{gt}}^{\mathrm{hard}}\right),
\end{equation}
where $S^{\mathrm{hard}}$ denotes binary skeletons obtained via morphological thinning.

The graph-based continuity term penalizes insufficient local connectivity:
\begin{equation}
\mathcal{L}_{\mathrm{graph}} =
\frac{1}{|\Omega|}
\sum_{p \in \Omega}
\max\left(0, \tau - N(p)\right),
\end{equation}
where $N(p)$ is the number of active neighbors of pixel $p$, and $\tau$ is a minimum 
connectivity threshold.

\subsection{Final Training Objective}

The overall loss is defined as
\begin{equation}
\mathcal{L} =
\alpha \, \mathcal{L}_{\mathrm{Dice}} +
\beta \, \mathcal{L}_{\mathrm{BCE}} +
\gamma \, \mathcal{L}_{\mathrm{topo}},
\end{equation}
where $\mathcal{L}_{\mathrm{topo}}$ corresponds either to $\mathcal{L}_{\mathrm{clDice}}$ 
or the custom topology loss defined above.

All topology-aware losses are treated as exploratory extensions and are evaluated 
in the ablation study in Section~\ref{sec:loss_ablation}.


\end{document}
