\begin{table*}[t]
\centering
\caption{Comprehensive evaluation results. The table compares performance across the TravelPlanner benchmark, Personal Constraint satisfaction (PPR), and Head-to-Head Win Rates.}
\label{tab:comprehensive_results_revised}
\vspace{2mm}
\setlength{\tabcolsep}{3.5pt} % 컬럼이 줄어들어 간격을 약간 넓혔습니다
\renewcommand{\arraystretch}{1.25}
\resizebox{\textwidth}{!}{%
\begin{tabular}{l|ccccc|cc|ccc|ccc}
\toprule
\multirow{3}{*}{\textbf{Method}} & \multicolumn{5}{c|}{\textbf{TravelPlanner}} & \multicolumn{2}{c|}{\textbf{Personal Constraint}} & \multicolumn{6}{c}{\textbf{Win Rate (vs Target)}} \\
\cmidrule(lr){2-6} \cmidrule(lr){7-8} \cmidrule(l){9-14}
 & \multicolumn{2}{c}{Soft} & \multicolumn{2}{c}{Hard} & \multirow{2}{*}{\textbf{Final}} & \multicolumn{2}{c|}{Persona} & \multicolumn{3}{c|}{\textbf{LLM-as-a-Judge}} & \multicolumn{3}{c}{\textbf{Human Eval}} \\
\cmidrule(lr){2-3} \cmidrule(lr){4-5} \cmidrule(lr){7-8} \cmidrule(lr){9-11} \cmidrule(l){12-14}
 & Micro & Macro & Micro & Macro & & Micro & H-Macro & vs Rule & vs Base & vs Ours & vs Rule & vs Base & vs Ours \\
\midrule
Rule-base & 99 & 70 & 99 & 70 & 88 & 89 & 67 & - & 30\% & 10\% & - & 34\% & 14\% \\
MAD-baseline & 99 & 70 & 99 & 70 & 88 & 89 & 67 & 70\% & - & 30\% & 66\% & - & 24\% \\
\textbf{MAD-ours} & \textbf{99} & \textbf{70} & \textbf{99} & \textbf{70} & \textbf{88} & \textbf{89} & \textbf{67} & \textbf{90\%} & \textbf{70\%} & - & \textbf{86\%} & \textbf{76\%} & - \\
\bottomrule
\end{tabular}%
}
\end{table*}