\documentclass{article}
\usepackage{booktabs}
\usepackage{array}
\usepackage{multirow}

\begin{document}

%% =============================================================================
%% FOQA Results Table
%% =============================================================================

\begin{table}[ht]
\centering
\caption{FOQA Evaluation Results - Exact Match Scores (\%)}
\label{tab:foqa_results}
\begin{tabular}{ll|c}
\toprule
\textbf{Category} & \textbf{Method} & \textbf{Score} \\
\midrule
\multirow{7}{*}{Baseline Methods}
& Baseline & 0.00 \\
& SFT & 41.60 \\
& Data Mix (5\%) & 40.90 \\
& Data Mix (20\%) & 43.85 \\
& KL ($\lambda=0.1$) & 44.55 \\
& LDIFS ($\lambda=0.1$) & 39.45 \\
& SafeLoRA ($\tau=0.3$) & 43.80 \\
\midrule
\multirow{6}{*}{Steering (qwen\_foqa)}
& Steer $\alpha=1.0$ & 43.50 \\
& Steer $\alpha=1.0$ (lr=1e-5) & 37.00 \\
& Steer $\alpha=1.5$ & 44.40 \\
& Steer $\alpha=2.0$ & 43.55 \\
& Steer $\alpha=3.0$ & 46.95 \\
& Steer $\alpha=5.0$ & \textbf{48.80} \\
\midrule
\multirow{8}{*}{Interleaved (qwen\_foqa\_interleaved)}
& WGF N=2 & 40.10 \\
& WGF N=5 & 37.20 \\
& WGF N=20 & 40.30 \\
& WGF N=100 & 40.75 \\
& WGNF N=2 & 43.15 \\
& WGNF N=5 & 39.70 \\
& WGNF N=20 & 42.45 \\
& WGNF N=100 & 40.10 \\
\bottomrule
\end{tabular}
\end{table}

%% =============================================================================
%% Operator Swap Results Table
%% =============================================================================

\begin{table}[ht]
\centering
\caption{Operator Swap Evaluation Results - Exact Match Scores (\%)}
\label{tab:opswap_results}
\begin{tabular}{ll|cccc}
\toprule
\textbf{Category} & \textbf{Method} & \textbf{Tier 0} & \textbf{Tier 1} & \textbf{Tier 2} & \textbf{Tier 3} \\
\midrule
\multirow{8}{*}{Baseline Methods}
& Baseline & 40.82 & 0.00 & 1.00 & 0.00 \\
& SFT & 37.00 & 30.00 & 34.30 & 37.68 \\
& SFT (Low LR) & 36.70 & - & - & - \\
& Data Mix (5\%) & 35.90 & 28.52 & 35.10 & 38.17 \\
& Data Mix (20\%) & 36.20 & 29.39 & 36.70 & 36.98 \\
& KL ($\lambda=0.1$) & 48.20 & 0.00 & 1.00 & 0.00 \\
& LDIFS ($\lambda=0.1$) & 36.00 & 29.40 & 34.90 & 37.69 \\
& SafeLoRA ($\tau=0.3$) & 40.10 & 28.28 & 34.80 & 32.01 \\
\midrule
\multirow{2}{*}{Steering (qwen\_tier)}
& Steer $\alpha=1.0$ & 37.60 & 34.73 & 35.40 & 39.42 \\
& Steer $\alpha=5.0$ & 36.80 & 34.88 & 31.80 & 39.23 \\
\midrule
\multirow{2}{*}{Interleaved (qwen\_tier\_interleaved)}
& WGF N=5 & 37.10 & 36.14 & 32.50 & 38.45 \\
& WGNF N=5 & 35.50 & 36.72 & 32.50 & 37.36 \\
\bottomrule
\end{tabular}
\end{table}

%% =============================================================================
%% Raw Data Reference
%% =============================================================================

% FOQA Results:
% foqa/default_train.csv: 41.6
% foqa/foqa_baseline.csv: 0.0
% foqa/foqa_safelora.csv: 43.8
% foqa/train_datamix_n20.csv: 43.85
% foqa/train_datamix_n5.csv: 40.9
% foqa/train_kl.csv: 44.55
% foqa/train_ldifs.csv: 39.45
% qwen_foqa/foqa_steer_coef1.0.csv: 43.5
% qwen_foqa/foqa_steer_coef1.0_lr1e5.csv: 37.0
% qwen_foqa/foqa_steer_coef1.5.csv: 44.4
% qwen_foqa/foqa_steer_coef2.0.csv: 43.55
% qwen_foqa/foqa_steer_coef3.0.csv: 46.95
% qwen_foqa/foqa_steer_coef5.0.csv: 48.8
% qwen_foqa_interleaved/foqa_wgf_n2.csv: 40.1
% qwen_foqa_interleaved/foqa_wgf_n5.csv: 37.2
% qwen_foqa_interleaved/foqa_wgf_n20.csv: 40.3
% qwen_foqa_interleaved/foqa_wgf_n100.csv: 40.75
% qwen_foqa_interleaved/foqa_wgnf_n2.csv: 43.15
% qwen_foqa_interleaved/foqa_wgnf_n5.csv: 39.7
% qwen_foqa_interleaved/foqa_wgnf_n20.csv: 42.45
% qwen_foqa_interleaved/foqa_wgnf_n100.csv: 40.1

% Operator Swap Results:
% oswap/tier_0_baseline.csv: 40.82
% oswap/tier_0_sft.csv: 37.0
% oswap/tier_0_sft_lowlr.csv: 36.7
% oswap/tier_0_datamix_n5.csv: 35.9
% oswap/tier_0_datamix_n20.csv: 36.2
% oswap/tier_0_kl.csv: 48.2
% oswap/tier_0_ldifs.csv: 36.0
% oswap/tier_0_sft_safelora.csv: 40.1
% oswap/tier_1_baseline.csv: 0.0
% oswap/tier_1_sft.csv: 30.0
% oswap/tier_1_datamix_n5.csv: 28.52
% oswap/tier_1_datamix_n20.csv: 29.39
% oswap/tier_1_kl.csv: 0.0
% oswap/tier_1_ldifs.csv: 29.40
% oswap/tier_1_sft_safelora.csv: 28.28
% oswap/tier_2_baseline.csv: 1.0
% oswap/tier_2_sft.csv: 34.3
% oswap/tier_2_datamix.csv: 35.1
% oswap/tier_2_datamix_n20.csv: 36.7
% oswap/tier_2_kl.csv: 1.0
% oswap/tier_2_ldifs.csv: 34.9
% oswap/tier_2_safelora.csv: 34.8
% oswap/tier_3_baseline.csv: 0.0
% oswap/tier_3_sft.csv: 37.68
% oswap/tier_3_datamix.csv: 38.17
% oswap/tier_3_datamix_n20.csv: 36.98
% oswap/tier_3_kl.csv: 0.0
% oswap/tier_3_ldifs.csv: 37.69
% oswap/tier_3_safelora.csv: 32.01
% qwen_tier/tier0_steer_coef1.0.csv: 37.6
% qwen_tier/tier0_steer_coef5.0.csv: 36.8
% qwen_tier/tier1_steer_coef1.0.csv: 34.73
% qwen_tier/tier1_steer_coef5.0.csv: 34.88
% qwen_tier/tier2_steer_coef1.0.csv: 35.4
% qwen_tier/tier2_steer_coef5.0.csv: 31.8
% qwen_tier/tier3_steer_coef1.0.csv: 39.42
% qwen_tier/tier3_steer_coef5.0.csv: 39.23
% qwen_tier_interleaved/tier_0_wgf_n5.csv: 37.1
% qwen_tier_interleaved/tier_0_wgnf_n5.csv: 35.5
% qwen_tier_interleaved/tier_1_wgf_n5.csv: 36.14
% qwen_tier_interleaved/tier_1_wgnf_n5.csv: 36.72
% qwen_tier_interleaved/tier_2_wgf_n5.csv: 32.5
% qwen_tier_interleaved/tier_2_wgnf_n5.csv: 32.5
% qwen_tier_interleaved/tier_3_wgf_n5.csv: 38.45
% qwen_tier_interleaved/tier_3_wgnf_n5.csv: 37.36

\end{document}
