\section{Further results}
\label{sec:further-results}
\subsection{Additional metrics}

% \subsubsection{Multi-class classification results (F1 score)}
\input{tables/table_multi_f1}

% \subsubsection{Multi-class classification results (ROC AUC)}
\input{tables/table_multi_auc}

% \subsubsection{Multi-class classification results (MCC)}
\input{tables/table_multi_mcc}

\subsection{Ablation: Small batch size regimes}
To better understand the benefits of combining a class-imbalance aware loss function with cross-entropy loss, we performed an experiment in a small batch size regime, which is commonly found in 3D medical image analysis.
The results from three independent runs are shown in Table~\ref{tab:ablation_low_batch_size}.
Noteworthy, we observe clearly lower standard deviations when combining soft MCC with cross-entropy loss, indicating a stabilizing effect of combining both losses. 
\input{tables/table_ablation_low_batch_size}

\section{Implementation details}\label{sec:training-setup}

\subsection{Glioma dataset}
\subsubsection{Image preprocessing and segmentation}
All images were preprocessed and segmented using the publicly available BraTS Toolkit~\cite{koflerBraTSToolkitTranslating2020}.
After tumor segmentation, images are [0;1] normalized within the brainmask.
A $96^3$ patch, centered around the center of mass of the tumor mask, is cropped from the image.
\begin{figure}[h]
   \includegraphics[width=\linewidth]{imgs/figure_3.jpeg}
    \vspace*{-10mm}
   \caption{
       Visualization of the four input sequences available in our dataset.
       The top row shows entire slices (with the tumor segmentation overlaid in red), and the bottom row shows the crops used for model training.}
   \label{fig:cropping}
\end{figure}

\subsubsection{Data augmentation}
We incorporate a range of randomized image intensity and geometry augmentations with a probability of 0.5.
%The mix of augmentations increases the robustness of the model through more variability in the training data.
The set of intensity-changing augmentations consists of randomly adjusting gamma values within the range of 0.5 to 1.5 and Gaussian blur, with a standard deviation varying randomly between 0 and 1.5.
In our geometric augmentations, we randomly flip along the sagittal, coronal, or axial planes and randomly crop with a randomized center, selecting a $64^3$ cube within an already cropped tumor region to introduce more variability in the tumor's positioning.

\begin{figure}[h]
   \includegraphics[width=\linewidth]{imgs/figure_4.jpeg}
    \vspace*{-10mm}
   \caption{Visualization of the data augmentations used to train our classifier.}
   \label{fig:augmentations}
\end{figure}

\subsection{Glaucoma dataset}
\subsubsection{Image preprocessing}
All images were resized to $240\times240$ px and [0;1] normalized.

\subsubsection{Data augmentation}
For the glaucoma dataset, we also include a range of randomized image intensity and geometry augmentations with a probability of 0.5 each:
%The mix of augmentations increases the robustness of the model through more variability in the training data.
The set of intensity-changing augmentations consists of randomly adjusting gamma values within the range of 0.5 to 1.5 and contrast adjustment with a gain randomly selected between [5.,10.].
In our geometric augmentations, we randomly flip along the horizontal or vertical axis.

\subsection{Model training}
Our classifier is a ResNet34~\cite{heDeepResidualLearning2016} architecture composed of [3;4;6;3] residual blocks, adapted to 3D.
We implement the neural network and training using TensorFlow 2.14~\cite{martinabadiTensorFlowLargeScaleMachine2015}, the gamma augmentations with Scikit-image~0.22.0~\cite{vanderwaltScikitimageImageProcessing2014}, and the Gaussian filtering with Scipy 1.11.3~\cite{virtanenSciPyFundamentalAlgorithms2020}.
We use Adam optimizer~\cite{kingmaAdamMethodStochastic2015}, with parameters $\beta_1=0.9$, $\beta_2=0.999$, a learning rate of $1\text{e-}3$, and a batch size of 50.
We also employ a cosine annealing learning rate scheduler~\cite{loshchilovSGDRStochasticGradient2016}, with a maximum of 250 epochs without warm-up.


\section{Dataset Distributions}\label{sec:dataset-distribution}
For improved visualization of the class imbalance present in the datasets used, we show the class distributions of all datasets in Figure~\ref{fig:dataset-distribution}.
The distribution over the whole Glioma dataset is very similar to the individual sub-datasets, pointing to a skewed real-world distribution of Gliomas.

\begin{figure}[h]
    \centering
    \subfigure[][b]{
         \centering
         \includegraphics[height=65pt]{imgs/hist_glioma.pdf}
         }%
         
    \subfigure[][b]{
        \centering
         \includegraphics[height=68pt]{imgs/hist_glaucoma.pdf}
         }
         \caption{Class distributions for the Glioma and Glaucoma dataset.}
         \label{fig:dataset-distribution}
\end{figure}