\section{Extended Results}
\subsection{Ablation Study}
\label{app:ablations}
Figure~\ref{fig:ablation_amos-mm} shows the ablation results for the Anatomy Experts baseline and ALO-enhanced model on AMOS-MM. ALO yields consistent improvements in recall-oriented clinical metrics while maintaining comparable precision.

\begin{figure}[h]
    \centering
    \includegraphics[width=1.0\textwidth]{figures_274/ablation_amos-mm.pdf}
    \caption{Per-pathology ablation on the external AMOS-MM dataset. The radar plots show Precision, Recall, and F1-Score for each pathology, comparing the Anatomy Experts baseline with the ALO-trained model.}
    \label{fig:ablation_amos-mm}
\end{figure}

\clearpage
\subsection{VLM3D Challenge Results}
\label{app:challenge_results}
Table~\ref{tab:ctrate_eval} summarizes the results on the hidden VLM3D challenge test set. We report the performance of the three submitted systems: CT-CHAT, our Anatomy Experts baseline, and our ALO-based model (an earlier version of the method presented in this paper). The table includes all clinical, NLG, and classification metrics used by the challenge for ranking.

\begin{table}[h]
    \centering
    \caption{Results on the hidden VLM3D challenge test set.}
    \label{tab:ctrate_eval}
    \resizebox{\textwidth}{!}{%
    \begin{tabular}{
        p{2.3cm}  % Dataset
        p{3.0cm}  % Method
        >{\centering\arraybackslash}p{3.5cm}  % CRG
        p{0.05cm}                             % Spacer
        >{\centering\arraybackslash}p{1.1cm}  % BLEU
        >{\centering\arraybackslash}p{1.1cm}  % ROUGE
        p{0.05cm}                             % Spacer
        >{\centering\arraybackslash}p{0.8cm}  % CE P
        >{\centering\arraybackslash}p{0.8cm}  % CE R
        >{\centering\arraybackslash}p{0.8cm}  % CE F1
    }
        \hline
        \multirow{2}{*}{\textbf{Dataset}} & \multirow{2}{*}{\textbf{Method}} 
        & \textbf{Clinical Metrics} $\uparrow$ & 
        & \multicolumn{2}{c}{\textbf{NLG Metrics} $\uparrow$} & 
        & \multicolumn{3}{c}{\textbf{CL (macro)} $\uparrow$} \\
        \cline{3-3} \cline{5-6} \cline{8-10}
        & & CRG & & BLEU & ROUGE & & P & R & F1 \\
        \hline

        \multirow{3}{*}{\makecell[l]{\textbf{CT-RATE} \\ (2,000 scans)}}
        & CT-CHAT & 0.380 & & \textbf{0.265} & \textbf{0.249} & & 0.233 & \textbf{0.329} & 0.176 \\
        & Anatomy Experts & 0.366 & & 0.240 & 0.230 & & \textbf{0.380} & 0.157 & 0.201 \\
        & ALO & \textbf{0.383} & & 0.259 & 0.232 & & 0.342 & 0.260 & \textbf{0.288} \\
        \hline
    \end{tabular}%
    }
\end{table}


\subsection{Anatomy-Level Evaluation}
\label{app:anatomy_level_eval}
Tables~\ref{tab:anatomy_level_baseline} and~\ref{tab:anatomy_level_alo} present per-anatomy metrics for the Anatomy Experts baseline and the ALO-enhanced model on the CT-RATE and AMOS-MM datasets.

\begin{table}[h]
    \centering
    \caption{Per-anatomy metrics for Anatomy Experts baseline (without ALO).}
    \label{tab:anatomy_level_baseline}
    \resizebox{\textwidth}{!}{%
    \begin{tabular}{
        p{2.3cm}  % Dataset
        p{2.8cm}  % Anatomy
        >{\centering\arraybackslash}p{1.2cm}  % GREEN
        >{\centering\arraybackslash}p{0.9cm}  % RaTE
        >{\centering\arraybackslash}p{1.5cm}  % RadGraph
        >{\centering\arraybackslash}p{2.1cm}  % 1/RadCLIQ
        p{0.05cm}                             % Spacer
        >{\centering\arraybackslash}p{1.1cm}  % BLEU
        >{\centering\arraybackslash}p{1.1cm}  % BERT
    }
        \hline
        \multirow{2}{*}{\textbf{Dataset}} & \multirow{2}{*}{\textbf{Anatomy}} 
        & \multicolumn{4}{c}{\textbf{Clinical Metrics} $\uparrow$} &
        & \multicolumn{2}{c}{\textbf{NLG Metrics} $\uparrow$} \\
        \cline{3-6} \cline{8-9}
        & & GREEN & RaTE & RadGraph & 1/RadCLIQ & & BLEU & BERT \\
        \hline

        % --- CT-RATE ---
        \multirow{9}{*}{\makecell[l]{\textbf{CT-RATE} \\ (1,564 scans)}}
        & Lung               & 0.261 & 0.580 & 0.127 & 1.114 & & 0.128 & 0.575 \\
        & Trachea\&Bronchi    & 0.777 & 0.833 & 0.578 & - & & 0.212 & 0.740 \\
        & Mediastinum        & 0.615 & 0.700 & 0.152 & 1.796 & & 0.200 & 0.598 \\
        & Heart              & 0.569 & 0.615 & 0.378 & 2.420 & & 0.210 & 0.603 \\
        & Esophagus          & 0.873 & 0.882 & 0.437 & - & & 0.525 & 0.837 \\
        & Pleura             & 0.446 & 0.613 & 0.356 & 1.612 & & 0.061 & 0.686 \\
        & Bone               & 0.364 & 0.743 & 0.345 & 2.903 & & 0.133 & 0.580 \\
        & Abdomen            & 0.372 & 0.586 & 0.156 & 1.418 & & 0.235 & 0.571 \\
        & Others             & 0.085 & 0.345 & 0.070 & 0.843 & & 0.033 & 0.367 \\
        \hline

        % --- AMOS-MM ---
        \multirow{9}{*}{\makecell[l]{\textbf{AMOS-MM} \\ (510 scans)}}
        & Lung               & 0.029 & 0.449 & 0.024 & 0.625 & & 0.009 & 0.373 \\
        & Trachea\&Bronchi    & 0.143 & 0.523 & 0.011 & 0.523 & & 0.000 & 0.363 \\
        & Mediastinum        & 0.153 & 0.512 & 0.042 & 0.684 & & 0.010 & 0.431 \\
        & Heart              & 0.108 & 0.394 & 0.055 & 0.573 & & 0.000 & 0.310 \\
        & Esophagus          & 0.035 & 0.424 & 0.000 & 0.589 & & 0.000 & 0.364 \\
        & Pleura             & 0.075 & 0.387 & 0.027 & 0.535 & & 0.000 & 0.337 \\
        & Bone               & 0.004 & 0.299 & 0.003 & 0.517 & & 0.000 & 0.260 \\
        & Abdomen            & 0.019 & 0.375 & 0.019 & 0.635 & & 0.012 & 0.373 \\
        & Others             & 0.001 & 0.413 & 0.000 & 0.477 & & 0.000 & 0.195 \\
        \hline
    \end{tabular}%
    }
\end{table}

\begin{table}[h]
    \centering
    \caption{Per-anatomy metrics applying ALO.}
    \label{tab:anatomy_level_alo}
    \resizebox{\textwidth}{!}{%
    \begin{tabular}{
        p{2.3cm}  % Dataset
        p{2.8cm}  % Anatomy
        >{\centering\arraybackslash}p{1.2cm}  % GREEN
        >{\centering\arraybackslash}p{0.9cm}  % RaTE
        >{\centering\arraybackslash}p{1.5cm}  % RadGraph
        >{\centering\arraybackslash}p{2.1cm}  % 1/RadCLIQ
        p{0.05cm}                             % Spacer
        >{\centering\arraybackslash}p{1.1cm}  % BLEU
        >{\centering\arraybackslash}p{1.1cm}  % BERT
    }
        \hline
        \multirow{2}{*}{\textbf{Dataset}} & \multirow{2}{*}{\textbf{Anatomy}} 
        & \multicolumn{4}{c}{\textbf{Clinical Metrics} $\uparrow$} &
        & \multicolumn{2}{c}{\textbf{NLG Metrics} $\uparrow$} \\
        \cline{3-6} \cline{8-9}
        & & GREEN & RaTE & RadGraph & 1/RadCLIQ & & BLEU & BERT \\
        \hline

        % --- CT-RATE ---
        \multirow{9}{*}{\makecell[l]{\textbf{CT-RATE} \\ (1,564 scans)}}
        & Lung               & 0.203 & 0.552 & 0.090 & 1.069 & & 0.134 & 0.553 \\
        & Trachea\&Bronchi    & 0.773 & 0.835 & 0.560 & - & & 0.300 & 0.739 \\
        & Mediastinum        & 0.570 & 0.686 & 0.137 & 1.624 & & 0.178 & 0.588 \\
        & Heart              & 0.547 & 0.607 & 0.350 & 2.063 & & 0.192 & 0.597 \\
        & Esophagus          & 0.844 & 0.870 & 0.409 & - & & 0.493 & 0.821 \\
        & Pleura             & 0.372 & 0.582 & 0.301 & 1.299 & & 0.077 & 0.077 \\
        & Bone               & 0.359 & 0.736 & 0.342 & 2.846 & & 0.138 & 0.580 \\
        & Abdomen            & 0.387 & 0.580 & 0.139 & 1.457 & & 0.236 & 0.574 \\
        & Others             & 0.076 & 0.337 & 0.062 & 0.817 & & 0.032 & 0.357 \\
        \hline

        % --- AMOS-MM ---
        \multirow{9}{*}{\makecell[l]{\textbf{AMOS-MM} \\ (510 scans)}}
        & Lung               & 0.019 & 0.421 & 0.019 & 0.642 & & 0.202 & 0.391 \\
        & Trachea\&Bronchi   & 0.144 & 0.447 & 0.014 & 0.534 & & 0.000 & 0.372 \\
        & Mediastinum        & 0.109 & 0.521 & 0.032 & 0.702 & & 0.011 & 0.434 \\
        & Heart              & 0.082 & 0.399 & 0.044 & 0.591 & & 0.000 & 0.336 \\
        & Esophagus          & 0.045 & 0.424 & 0.004 & 0.596 & & 0.000 & 0.367 \\
        & Pleura             & 0.060 & 0.402 & 0.027 & 0.572 & & 0.000 & 0.361 \\
        & Bone               & 0.002 & 0.299 & 0.003 & 0.525 & & 0.000 & 0.269 \\
        & Abdomen            & 0.003 & 0.366 & 0.015 & 0.666 & & 0.016 & 0.666 \\
        & Others             & 0.000 & 0.396 & 0.000 & 0.487 & & 0.000 & 0.216 \\
        \hline
    \end{tabular}%
    }
\end{table}
