\begin{table*}[ht]
    \small
    \renewcommand{\arraystretch}{1.25}
    \caption{Comparison between Baseline and \AbbrName framework.\label{tab:results}}
    \centering
    \begin{tabular}{l c c c c c}
        \toprule
        \textbf{Metric} & \textbf{Baseline} & \textbf{CAMA}
        & \textbf{$\Delta$\% / $d$} & \textbf{p-value} \\
        \midrule
        Empathy                    & 3.47 $\pm$ 0.52 & \textbf{4.21 $\pm$ 0.38} & +21 / 1.38 & ***  \\
        Cultural Fit               & 2.89 $\pm$ 0.63 & \textbf{4.03 $\pm$ 0.45} & +39 / 1.42 & ***  \\
        Trust                      & 3.59 $\pm$ 0.48 & \textbf{4.12 $\pm$ 0.41} & +15 / 1.07 & **   \\
        Comfort                    & 3.61 $\pm$ 0.46 & \textbf{4.18 $\pm$ 0.44} & +16        & **   \\
        Helpfulness                & 3.52 $\pm$ 0.51 & \textbf{4.05 $\pm$ 0.42} & +15        & *    \\
        BERTScore                  & 0.71 $\pm$ 0.04 & \textbf{0.79 $\pm$ 0.03} & +11        & **   \\
        Ethical Compliance (\%)    & 89.7 $\pm$ 4.2  & \textbf{96.4 $\pm$ 2.8}  & +7         & *    \\
        Toxicity (\%) $\downarrow$ & 6.3 $\pm$ 2.1   & \textbf{2.8 $\pm$ 1.4}   & -56 / 1.10 & *    \\
        Latency (s)                & 2.9 $\pm$ 0.4   & \textbf{3.2 $\pm$ 0.5}   & +0.3       & n.s. \\
        $C_{\text{score}}$         & 0.56 $\pm$ 0.07 & \textbf{0.72 $\pm$ 0.05} & +29        & ***  \\
        \bottomrule
    \end{tabular}\\[4pt]
    \footnotesize{(* $p < 0.05$, ** $p < 0.01$, *** $p < 0.001$; paired $t$-test.)}
\end{table*}
