\documentclass{midl} % Include author names
% \documentclass[anon]{midl} % Anonymized submission

% The following packages will be automatically loaded:
% jmlr, amsmath, amssymb, natbib, graphicx, url, algorithm2e
% ifoddpage, relsize and probably more
% make sure they are installed with your latex distribution

\usepackage{mwe} % to get dummy images
\usepackage{array}
\usepackage{url}
\usepackage{multicol}
% \setlength{\textfloatsep}{0 pt minus 10.0pt}
\usepackage{titlesec}
\titlespacing*{\section}
{0pt}{12.0pt}{4.0pt}
\titlespacing*{\subsection}
{0pt}{12.0pt}{4.0pt}
\newenvironment{conditions}
  {\par\vspace{\abovedisplayskip}\noindent\begin{tabular}{>{$}l<{$} @{${}={}$} l}}
  {\end{tabular}\par\vspace{\belowdisplayskip}}
\jmlrvolume{-- 319}
\jmlryear{2024}
\jmlrworkshop{Full Paper -- MIDL 2024}
\editors{Accepted for publication at MIDL 2024}

\title[Learned morphological features guide cell typing]{Learned morphological features guide cell type assignment of deconvolved spatial transcriptomics}


% More complicate cases, e.g. with dual affiliations and joint authorship
\midlauthor{\Name{Eduard Chelebian\nametag{$^{1}$}} \Email{eduard.chelebian@it.uu.se}
\AND
\Name{Christophe Avenel\nametag{$^{1}$}} \Email{christophe.avenel@it.uu.se}
\AND
\Name{Julio Leon\nametag{$^{2}$}} \Email{julio.leon@ucsf.edu}
\AND
\Name{Chung-Chau Hon\nametag{$^{3}$}} \Email{chungchau.hon@riken.jp}
\AND
\Name{Carolina Wählby\nametag{$^{1}$}} \Email{carolina.wahlby@it.uu.se}
\AND
\addr $^{1}$ Department of Information Technology and SciLifeLab, Uppsala University, Uppsala, Sweden\\
\addr $^{2}$ Department of Neurology and Weill Institute for Neurosciences, University of California, San Francisco, San Francisco, California\\
\addr $^{3}$ Laboratory for Genome Information Analysis, RIKEN IMS, Yokohama, Japan
}

\begin{document}

\maketitle

\begin{abstract}
Spatial transcriptomics enables to study the relationship between gene expression and tissue organization. Despite many recent advancements, existing sequencing-based methods have a spatial resolution that limits identification of individual cells.  To address this, several cell type deconvolution methods have been proposed to integrate spatial gene expression with single-cell and single-nucleus RNA sequencing, producing per spot cell typing. However, these methods often overlook the contribution of morphology, which means cell identities are randomly assigned to the nuclei within a spot. In this paper, we introduce MHAST, a morphology-guided hierarchical permutation-based framework which efficiently reassigns cell types in spatial transcriptomics. We validate our method on simulated data, synthetic data, and a use case on the broadly used Tangram cell type deconvolution method with Visium data. We show that deconvolution-based cell typing using morphological tissue features from self-supervised deep learning lead to a more accurate annotation of the cells. 
\end{abstract}

\begin{keywords}
self-supervised learning, spatial transcriptomics, cell type deconvolution
\end{keywords}

\section{Introduction}

Spatial transcriptomics has advanced our ability to understand the interplay between gene expression and tissue morphology, i.e., the spatial organization of tissue \cite{bressan2023dawn}. However, these methods, broadly classified into imaging-based and sequencing-based, are not without their limitations. Imaging-based methods reach sub-cellular resolution, but have limited gene coverage, while sequencing-based approaches, like Visium HD from 10X Genomics, Stereo-seq \cite{xia2022single} and Seq-scope \cite{cho2021microscopic}, compromise spatial resolution, and each sequenced tissue region may contain multiple cell types. To address this, several studies have proposed integrating spatial transcriptomics with single cell and single nucleus RNA sequencing (sc/snRNA-seq) by developing cell type deconvolution methods \cite{chen2022comprehensive, li2022benchmarking, li2023comprehensive}. These methods can be categorized into probabilistic-based, non-negative matrix factorization-based, graph-based, deep learning-based and optimal transport-based \cite{li2023comprehensive}. 

Benchmark studies show that Tangram \cite{biancalani2021deep}, a deep learning-based method, and Cell2location \cite{kleshchevnikov2022cell2location}, a probabilistic-based method, consistently outperformed others on various metrics. Interestingly, despite both methods utilizing nuclei segmentation from hematoxylin and eosin (H\&E) for estimating cell density, they overlook morphology as a guiding factor for cell typing. Instead, Tangram assigns a cell types to each detected nucleus randomly. Recent efforts, such as SpaDecon \cite{coleman2023spadecon}, tried to address this limitation by incorporating histology intensity values per region. However, this approach falls short of leveraging the rich information available in morphology. Consequently, while deconvolution accuracy at the spot level may be achieved, arguably the random attribution of cell types to individual nuclei does not allow a real increase in resolution.

To address this issue, we conceptualize the assignment as a problem of permutation. We hypothesize that we know the number of cell types within each spot from the deconvolution method, but that we have a permuted version of the actual composition. Using nuclei morphology as a guide, we conducted efficient hierarchical permutations under the assumption that similar cell types exhibit comparable nuclear morphology in H\&E staining. To capture morphology we tried both classical morphology features and self-supervised deep representations. Due to the intrinsic difficulty of evaluating the method without a ground-truth, we conducted experiments on simulated and synthetic data as well as on a real use-case. 

The main contributions of our work can be summarized as follows: 

\begin{enumerate}
    \item We developed a morphology-based cell re-assignment step for single-cell to spatial transcriptomics deconvolution.
    \item We propose a hierarchical permutation method that allows to efficiently improve the arrangement of cell types in a tissue.
    \item We used self-supervised deep learning features as powerful representations of cells.
\end{enumerate}

MHAST (Morphology-guided Hierarchical reAssignment of cell types in Spatial Transcriptomics) can be integrated into any deconvolution method to achieve their full potential by leveraging the tissue morphology. The code for implementations and demos is available at \url{https://github.com/eduardchelebian/mhast}.

\section{Methods}
\subsection{Mathematical formulation}
The proposed approach seeks to efficiently determine the optimal arrangement of cell types in spatial transcriptomics experiments by addressing the computational challenge associated with exhaustive permutation calculations (with factorial complexity $O(n!)$). Instead of directly computing every possible permutation, the optimization is conducted in two hierarchical steps: first locally at the spot-level, and then globally. This hierarchical strategy reduces the permutation space, mitigating the complexity of the problem. Figure \ref{fig:intuition} shows the intuition behind the method.

Sequencing-based spatial transcriptomics experiments are organized in spots that capture transcriptome-wide gene expression. Given \(N\) cells with \(L\) cell type labels and \(K\) morphological features belonging to \(M\) spots of different sizes, let  \(A \in \{0,1\}^{N \times M}\) be the one-hot encoded matrix indicating the belonging of each cell to a spot, let \(B \in \mathbb{R}^{N \times K}\) be the matrix indicating the cell features and let \(X \in \{0,...,L\}^{N \times M}\) be the matrix indicating the cell type of each cell. Additionally, let \(P_m \in \{0,1\}^{N \times N}\) be the permutation matrix for rearranging cell type labels within a spot \(m\).

First, we apply the optimization at spot-level locally. We define \(\mathcal{P}_m\) as the space of all acceptable permutation matrices \(P_m\) for spot $m \in M$ based on constraints (1) restrict permutations to each spot and (2) ensure that two cells of the same type are not permuted:

\begin{equation}
\forall m, \quad P_m \cdot A = A
\end{equation}
\begin{equation}
\forall m, \quad P_m \cdot X \neq X
\end{equation}

For each spot \(m\), exhaustively find $P_m \in \mathcal{P}_m$ that maximizes the within-spot Calinski-Harabasz (CH) score \cite{calinski1974dendrite} for spots that have more than one cell of each type. Note that the choice of the CH score is deliberate, as it effectively balances between-cluster and within-cluster distributions, contributing to the method's efficacy.
\begin{equation}
\max_{P_m \in \mathcal{P}_m} CH(P_m \cdot X_m, B_m) \quad \textrm{if} \quad \exists \: l \in L_m : |X_{m, l}| > 1
\end{equation}

where the CH score for $L$ number of cell types on the dataset $B= \left [ b_1, b_2, ..., b_N \right ]$ is:
\begin{equation}
\label{eq:chscore}
CH(X,B) = \frac{\sum_{l=1}^{L}|X_l|\left \| c_l - c \right \|^2}{L-1} / \frac{\sum_{l=1}^{L}\sum_{i=1}^{|X_l|}\left \| b_i - c_l \right \|^2}{N-L}
\end{equation}

where $|X_l|$ is number of cells with the $l$th label, $c_l$ is the centroid of the $l$th label and $c$ is the global centroid.

This step results in the arrangement of cells within each spot. However, it does not optimize arrangements within spots where the number of cells for each cell type is identical $ |X_{m,i}| = |X_{m,j}| \quad \textrm{for} \quad i \neq j $. In the case where $|X_{m,i}| = |X_{m,j}| = 1$, it is not possible to calculate the CH score, while if $|X_{m,i}| = |X_{m,j}| > 1$ there are multiple $P_m$ that yield the same highest CH score. 

With these $m$ spots we define a new space of acceptable permutations $\mathcal{P'}_m \subset \mathcal{P}_m$. Having significantly reduced the permutation space, we can now conduct a global optimization. For each spot \(m\), exhaustively find $P_m \in \mathcal{P'}_m$ that maximizes the across-spot CH score:
\begin{equation}
\max_{P_m \in \mathcal{P'}_m} CH(P_m \cdot X, B) 
\end{equation}

which will produce the arrangement for the rest of the instances.

\begin{figure}[!htbp]
    \centering
    \includegraphics[width=1\linewidth]{intuition-min.png}
    \caption{UMAP \cite{becht2019dimensionality} dimensionality reduced feature space of (a) randomly permuted spots, (b) local optimization per spot,  (c) result of local optimization, (d) global optimization and (f) final result. Values in the bottom right correspond to Calinski-Harabasz (CH) score. Colors represent different cell identities.}
    \label{fig:intuition}
\end{figure}

\subsection{Cell type deconvolution}
In order to get a single cell type per detected nucleus in sequencing-based spatial transcriptomics experiments, we need to first segment the cells and then run the deconvolution methods for inferring the cell type composition.

\textbf{Nuclei segmentation.} For segmenting the nuclei we used the built-in nuclei detection method in QuPath \cite{bankhead2017qupath} on the H\&E image. This method has a good balance between speed and accuracy, enabling the efficient annotation of nuclei within a specified region of interest. Additionally, it provides measurements associated with the detected nuclei, serving as  features for subsequent analyses, as presented in Section \ref{subsec:features}. Once the nuclei are detected, they can be used to measure the relative cell abundance in each spatial transcriptomic spot, which serves as a surrogate of cell density.

\textbf{Composition inference.} Following nuclei segmentation, the determination of cell type composition involves the application of deconvolution methods. As established before \cite{chen2022comprehensive, li2022benchmarking, li2023comprehensive}, Tangram \cite{biancalani2021deep} consistently emerges as one of the top-performing methods. Given its proven efficacy, Tangram was selected for validation in our experimental framework. Tangram is a deep learning-based approach which aligns single-cell gene expression data with spatial gene expression data by mapping them onto the same anatomical region, using shared genes for the mapping.

\subsection{Feature extraction}
\label{subsec:features}
From the detected nuclei, we extract morphological descriptors which will guide the permutations under the hypothesis that similar cell types share morphological features.

\textbf{Classical features.} Using QuPath \cite{bankhead2017qupath}, the same software utilized for detecting the nuclei, we extract per-nucleus descriptors and per-cell descriptors. Cells are defined by expanding each detected nucleus to a radius of $5 \ \mu m$ until it encounters another nucleus expansion. This is not the most accurate estimation but serves as a way of including the context around the nucleus. The classical features extracted from the cells (C: cells) and nuclei (C: nuclei) include: area, perimeter, circularity maximum and minimum diameter, eccentricity and H\&E-derived intensity features.

\textbf{Self-supervised learning features.} Our dataset was too small to expect the model to learn relevant features training it from scratch. We therefore started from a publicly available ResNet18 model trained by self-supervision with SimCLR \cite{chen2020simple}  on 57 histopathological datasets \cite{ciga2022self}. We fine-tuned the model using the detected nuclei as centers, extracted one image patch per cell, and trained by self-supervision with SimCLR in the same way as in the original model. Experiments included patch sizes of $32\times32$ (DL: 32), $64\times64$ (DL: 64) and $128\times128$ (DL: 128) to test the contribution of different contexts. For example patches, see Appendix \ref{appendix:patch_size}. Finally, we used the fine-tuned model's last fully connected layer before prediction to define features for each cell patch.

\section{Experiments on simulated data}
We first evaluated the method using simulated Visium data, enabling generation of ground-truth for methodological validation under controlled conditions. 

\subsection{Data generation}

We are essentially simulating the relative abundance of cell types per spot, which is the output from deconvolution methods. This output is then randomly assigned to the detected nuclei and denoted as $X_{perm}$, as shown in Figure \ref{fig:summary}. Subsequently, guided by the morphological features in the feature matrix $B$, our objective is to optimally assign cell types to each morphology, resulting in the actual assignment denoted as $X$. The details of the data simulation can be found on Appendix \ref{appendix:simulation}.

\begin{figure}[!htbp]
    \centering
    \includegraphics[width=1\linewidth]{simulated-min.png}
    \caption{Data simulation workflow. From the output from cell type deconvolution methods, we simulate the random assignment of cell types ($X_{perm}$). Guided by the simulated morphological features $B$, we correct the cell assignment to match the cell identities with their morphology ($X$). More details on Appendix \ref{appendix:simulation}.}
    \label{fig:summary}
\end{figure}

\subsection{Evaluation and results}
The evaluation of the simulated data aims to determine the level of feature descriptiveness required to recover the true cell type arrangement $X$ from its permuted version $X_{perm}$. 

Appendix \ref{appendix:simulation_results} shows how the rearrangement accuracy of $X_{perm}$ with respect to the original $X$ changes when increasing the feature overlap. The baseline is established by calculating the accuracy of the randomly permuted $X_{perm}$. With non-overlapping features for each cell types, we have a perfect rearrangement accuracy. Notably, as feature overlap increases, the rearrangement still remains valuable, outperforming random allocation.

\section{Experiments on synthetic data}
One approach to incorporate actual H\&E features into the evaluation, while still having a ground-truth, is to generate a synthetic Visium dataset from other spatial transcriptomics methods. Xenium from 10X Genomics is particularly suitable for this purpose. It provides cell typing information from high-resolution imaging-based spatial transcriptomics, typically accompanied by DAPI imaging, but also includes H\&E staining on the same section.

\subsection{Dataset}
We use the Xenium In Situ Breast Dataset\footnote{\url{https://www.10xgenomics.com/products/xenium-in-situ/preview-dataset-human-breast}}, by 10X Genomics \cite{janesick2022high}. We sample a region with variable density of cells and cell types and generate synthetic Visium spots including the cell type locations. The details can be found in Appendix \ref{appendix:xenium2visium}.

\subsection{Evaluation and results}
The assessment of the synthetic data focuses on identifying the descriptors that yield the best reconstruction score. To this end, we implemented the method with the different feature extractors and calculated the macro-averaged F1-score to capture the overall contribution of all cell types. This evaluation is compared with random permutations, which emulate the result from deconvolution methods.

The results in Figure \ref{fig:xenium} reveal that applying the method consistently enhanced the results, outperforming the majority of random outcomes. In fact, self-supervised features on $64\times64$ patches surpass every random results and successfully retrieves more of the original cell types. It is important to note that this task is more challenging than its real word counterpart, given the higher density and diversity of cell types in comparison to what one would encounter in a typical Visium experiment. 

\begin{figure}[!htbp]
    \centering
    \includegraphics[width=1\linewidth]{xenium-min.png}
    \caption{(a) Region of Xenium cell typing registered on H\&E. (b) Region of synthetic Visium data. (c) Reconstruction F1-score boxplot from random permutations. DL: 32, DL: 64 and DL: 128 represent self-supervised features with patch sizes of 32, 64 and 128, respectively. C: nuclei and C: cells represent the classical features in the detected nuclei and on the expanded cells, respectively.}
    \label{fig:xenium}
\end{figure}

\section{Experiments on real data}
Having assessed the robustness of the hierarchical permutation method on simulated data, and confirmed that self-supervised features from $64\times64$ patches as the most effective within synthetic data, we applied our method to a real-world use case.

\subsection{Dataset}
To validate the method, we employed the same datasets from the original Tangram paper \cite{biancalani2021deep}. This dataset is a 10X Genomics Visium experiment on a mouse brain coronal section\footnote{\url{https://support.10xgenomics.com/spatial-gene-expression/datasets/1.1.0/V1_Adult_Mouse_Brain}}. For annotated scRNA-seq data, we used the mouse cortex dataset shared by \cite{tasic2018shared}. Both the single-cell and spatial datasets are publicly available in Scanpy \cite{wolf2018scanpy} and Squidpy \cite{palla2022squidpy} APIs, ensuring reproducibility.

\subsection{Evaluation and results}
Using QuPath, we located the nuclei within the H\&E image associated with the Visium mouse brain. Tangram was then applied to two regions (refer to Appendix \ref{appendix:region}) within the cortex of the mouse brain, utilizing annotated scRNA-seq data for cell type deconvolution. We employ self-supervised learning to extract morphological features from $64\times64$ patches centered on the nuclei, and our method is then applied to rearrange the cell types based on these features. Since a ground-truth is not available, evaluating our approach in this context is not straightforward. Nevertheless, we can assess whether the global score attained through our two-step optimization surpasses the global score from random permutations. To accomplish this, we perform $10000$ random shuffles within the spots and examine whether our method shows superior performance. This evaluation is conducted using both the CH score ---the one we are trying to maximize---, and also the Davies-Bouldin score \cite{davies1979cluster}, which we did not try to minimize.

Figure \ref{fig:visium} shows the results on region 1. The density plots in Figures \ref{fig:visium}c and \ref{fig:visium}d demonstrate that our two-step optimization indeed maximized the CH score and minimized the Davies-Bouldin score in comparison with the random rearrangements. This suggests that our method achieves a rearrangement that ensures consistency in morphology for each cell type. For the sake of reproducibility, we repeated the analysis on another region of the cortex, as detailed in Appendix \ref{appendix:region2}, obtaining similar results.

\begin{figure}[!htbp]
    \centering
    \includegraphics[width=1\linewidth]{region1_-min.png}
    \caption{Results for region 1. (a) Tangram randomly assigned cell types. (b) Corrected cell types using MHAST. (c)  Calinski-Harabasz (higher is better) and (d) Davies-Bouldin  (lower is better) scores for $10000$ random bag permutations and our method. Interactive visualization available in TissUUmaps \cite{pielawski2023tissuumaps} at \url{https://mhast.serve.scilifelab.se/brain_mouse.tmap}.}
    \label{fig:visium}
\end{figure}

\section{Discussion and Conclusions}

In this paper, we presented MHAST, an efficient permutation method for rearranging the cell types from spatial transcriptomics-single cell deconvolution guided by self-supervised morphology features. 

Using simulated data and morphological features, we demonstrated the effectiveness of the method in reconstructing the original arrangement of cell types guided by their morphology. We additionally established the method's robustness by progressively reducing the descriptive power of the features, yet still showing the value of applying the method.

Through the use of synthetic Visium data generated from Xenium, we incorporated real H\&E morphological features alongside a form of ground truth. Despite the challenges associated with registering one image modality to another and synthesizing the data, the results indicated that self-supervised features outperformed other descriptors in characterizing morphology and reconstructing the original arrangement. The differences between patch sizes can be attributed to the amount of context, as explored in Appendix \ref{appendix:patch_size}.

In the practical application of the method to Tangram as a use case, our experiment revealed that the two-step rearrangement achieved results equivalent to the best possible global arrangement. Importantly, this was achieved without incurring the computational costs associated with global permutations.

An inherent limitation of the method lies in the assumption that every cell type possesses an identifiable morphology that can be leveraged in this problem, which may not be applicable for every encountered cell type. For instance, non-neuronal cells like oligodendrocytes have an identifiable small and round nucleus surrounded by cytoplasm, while cells in different neuronal layers can be more challenging to distinguish in H\&E. Another constraint arises when applying the method in large regions, where computational costs become prohibitive. This issue can be addressed by implementing the global optimization step in a rolling window manner.

Future lines of work may explore the prediction of cell types also outside the spots, based on the maximization of the permutation results. This could be done especially for cell types with a well-documented morphology. Another potential direction involves streamlining the process by incorporating the permutation step into the workflow of Tangram or other cell type deconvolution methods. This can be relevant as some of these methods already incorporate cell segmentation as part of their process.

In conclusion, MHAST is able to enhance the potential of cell type deconvolution methods such as Tangram by improving the attribution of cell types in low-resolution sequencing-based methods like Visium. The ideas of using self-supervised learning-based morphology and cluster tightness metrics to complement the information provided by molecular data could be extended further to applications beyond spatial transcriptomics.

\clearpage

\midlacknowledgments{}
This research was funded by the European Research Council via ERC Consolidator grant CoG 682810 and Technology Development project from SciLifeLab to C.W. and support from the Scandinavia-Japan Sasakawa Foundation to E.C.  

\bibliography{midl24_319}

\clearpage

\appendix


\section{Generating simulated data}
\label{appendix:simulation}
For generating simulated Visium data, we need to define the number of spatial transcriptomics spots $M$, the range of cells per spot $N_m$, the number of cell types $L$ and the number of features $K$.

Given the number of spatial transcriptomics spots $M$ and a range of cells per spot $N_m$ we generate matrix  \(A \in \{0,1\}^{N \times M}\) that indicates the belonging of each nucleus to a spot. Given the relative proportion of cell types $p_l$ for $l \in L$ we can define matrix \(X \in \{0,...,L\}^{N \times M}\) which indicates the belonging of each nucleus to a cell type (our ground truth). Finally, specifying the number of features $K$, we can define the features matrix \(B \in \mathbb{R}^{N \times K}\). We work under the assumption that cell types have descriptive morphological features, thus we model this by sampling values from different distributions for each cell type. Features for each cell type $l$ are generated by adjusting the standard deviation based on an overlap $d$ sampled from $B_l \sim \mathcal{N}(l, d)$. This will further allow to compare also how different do the morphological features per cell type be for the method to work. 

Finally, we apply random permutations per bag to matrix $X$ to generate $X_{perm}$. The synthetic problem then would be to find the permutation matrices $P_m$ per bag $m$ that allows to recover $X$ from $X_{perm}$.

Specifically, for the example in Figure \ref{fig:summary} we used $M=12$, $N_m=[2,6]$, $L=4$ and $K=10$. We chose the proportion of cells $p_l$ such that the first row contains a higher percentage of cell type $C_3$, the second $C_1$ and the third $C_4$, with $C_2$ being only marginally present, simulating different layers. 

To give an idea of the efficiency gain even on this simulated dataset, if one were to exhaustively calculate the global optimal arrangement from all the possible permutations within every spot it would take, from left to right and from top to bottom, $1 \cdot 2 \cdot 20 \cdot 12 \cdot 5 \cdot 12 \cdot 10 \cdot 1 \cdot 1 \cdot 3 \cdot 6 \cdot 6 = 31 104 000$ operations. Using our proposed hierarchical approach it takes $1 \cdot 2 \cdot 2 \cdot 2 \cdot 1 \cdot 2 \cdot 1 \cdot 1 \cdot 1 \cdot 1 \cdot 6 \cdot 2 = 192$ operations, due to many spots having one or two local optima that are then included in the reduced permutation space.

\clearpage

\section{Results of simulated data}
\label{appendix:simulation_results}

Figure \ref{fig:distances} shows the reconstruction F1-score from applying the method with increasingly overlapping, and thus, decreasingly descriptive features.  

\begin{figure}[htpb]
    \centering
    \includegraphics[width=.5\linewidth]{overlap.png}
    \caption{Reconstruction F1-score from random shuffling and applying the hierarchical permutation method with different levels of feature overlap. Shaded areas correspond to the 95\% confidence interval from 10 initializations.}
    \label{fig:distances}
\end{figure}

\section{Generating synthetic Visium data from Xenium}
\label{appendix:xenium2visium}

Xenium from 10X Genomics includes cell typing per cell along with H\&E images. The first step involves cell typing on cells detected in another image stained with DAPI, requiring the registration of the DAPI image to the H\&E image.  Utilizing the scale-invariant feature transform (SIFT), we achieved this registration, although obtaining perfect cell-to-cell alignment between two different modalities proved challenging. Following H\&E to DAPI image registration, QuPath was utilized to detect nuclei in the H\&E images.  Using the nuclei locations in H\&E and the registered locations in DAPI, we mapped the closest cell types from the registered to the detected ones. This process inherently poses a challenge, as certain cell types may not be accurately mapped if another entity is closer, adding complexity to our subsequent task. With cell typing established on the H\&E, the next step involves synthesizing Visium spots based on their distribution in real-life scenarios and discarding cells falling outside these spots.

The Xenium cell type annotations, DAPI image and H\&E image used to generate the synthesize the Visium data are available at \url{https://www.10xgenomics.com/products/xenium-in-situ/preview-dataset-human-breast}.

\clearpage

\section{Extracted regions from full image}
\label{appendix:region}

\begin{figure}[htpb]
    \centering
    \includegraphics[width=1\linewidth]{regions-min.png}
    \caption{(a) Original Visium image with regions 1 and 2. (b) and (c) Regions 1 and 2 annotated spots. Interactive full resolution visualization available in TissUUmaps \cite{pielawski2023tissuumaps} at \url{https://mhast.serve.scilifelab.se/brain_mouse.tmap}.} 
    \label{fig:regions}
\end{figure}

\clearpage

\section{Analysis on region 2}
\label{appendix:region2}

\begin{figure}[htpb]
    \centering
    \includegraphics[width=1\linewidth]{region2_-min.png}
    \caption{Results for region 2. (a) Tangram randomly assigned cell types. (b) Corrected cell types using MHAST. (c)  Calinski-Harabasz (higher is better) and (d) Davies-Bouldin  (lower is better) scores for $10000$ random bag permutations and our method. Interactive full resolution visualization available in TissUUmaps \cite{pielawski2023tissuumaps} at \url{https://mhast.serve.scilifelab.se/brain_mouse.tmap}.}
    \label{fig:region2}
\end{figure}

\section{Effect of patch size}
\label{appendix:patch_size}

Figure \ref{fig:patches} shows the different patch sizes extracted with the same neighboring nuclei as centers. We hypothesize $32\times32$ patches do not include enough context for the self-supervised method to learn meaningful representations, while $128\times128$ patches include too much context and nuclei that are close start having similar features, hindering their separation per type. 

\begin{figure}[htpb]
    \centering
    \includegraphics[width=\linewidth]{patches-min.png}
    \caption{Comparison of patch sizes for two neighboring cells.}
    \label{fig:patches}
\end{figure}


\end{document}
