\begin{table}[ht]
\caption{\textbf{Overview of Whole-Slide Image (WSI) datasets used in this study.} For each cohort, we report the data source, clinical or diagnostic task, number of slides, and the distribution of labels.}
\label{tab:supp_data}
\centering
\resizebox{0.65\textwidth}{!}{
\begin{tabular}{p{2.5cm} p{4.5cm} c p{2.5cm}}
\hline
\textbf{Cohort} & \textbf{Description} & \textbf{\# Slides} \hspace{0.2cm}  & \textbf{\# Labels} \\

\hline

\multicolumn{4}{c}{\textbf{TCGA} \cite{tomczak2015review}} \\
\hline

BRCA & 
WSIs from TCGA-BRCA. Used for histological subtype classification between invasive ductal carcinoma (IDC) and invasive lobular carcinoma (ILC). 
& 977 & 
\begin{tabular}{@{}l@{}}IDC: 779 \\ ILC: 198\end{tabular} \\[2pt]

NSCLC & 
WSIs from TCGA-LUAD and TCGA-LUSC. Used for distinguishing between lung adenocarcinoma (LUAD) and lung squamous cell carcinoma (LUSC). 
& 956 & 
\begin{tabular}{@{}l@{}}LUAD: 478 \\ LUSC: 478\end{tabular} \\[2pt]

LUAD (TP53) & 
TCGA-LUAD dataset for TP53 mutation prediction directly from H\&E WSIs.  
Labels correspond to mutation status (mutated vs wild-type). 
& 427 & 
\begin{tabular}{@{}l@{}}TP53 WT: 199 \\ TP53 Mut: 228\end{tabular} \\


\hline
\multicolumn{4}{c}{\textbf{Camelyon 16 Challenge} \cite{bejnordi2017diagnostic}} \\
\hline

Camelyon16 (Train) & 
Lymph node metastasis detection dataset. Training subset of whole-slide images (H\&E) annotated for the presence of tumor metastasis. 
& 270 &
\begin{tabular}{@{}l@{}}Normal: 159 \\ Tumor: 111\end{tabular} \\[2pt]

Camelyon16 (Test) & 
Official test subset from the Camelyon16 challenge.
& 129 & 
\begin{tabular}{@{}l@{}}Normal: 80 \\ Tumor: 49\end{tabular} \\

\hline
\end{tabular}
}
\end{table}