\begin{table}[htbp]
    \centering
    \caption{Comparison of FID and FID\textsubscript{XRV} values for images generated by LDMs using different text encoders.
        CNR values are also included to highlight the negative correlation between CNR and FID. The frozen CLIP
        results is reproduced and the learnable CLIP result is adopted from~\citet{Dombrowski_2024}.}
    \label{tab:fid_comparison}
    \begin{tabular}{l@{\hskip 0.3cm}c@{\hskip 0.5cm}c@{\hskip 0.5cm}c}
        \toprule
        Model & FID & FID\textsubscript{XRV} & CNR \\
        \midrule
        Frozen CXR-BERT & 109.4 & 18.0 & 0.84 \\
        Frozen CLIP & 83.8 & 14.5 & 0.72\\
        Learnable CLIP & 61.9 & 7.7 & 0.13 \\
        \bottomrule
    \end{tabular}
\end{table}