python -m experiments.train.csft --kl_decay 0.0 --model_name Qwen/Qwen3-8B --data_type arc 
python -m experiments.train.csft --kl_decay 0.0 --model_name Qwen/Qwen3-8B --data_type hellaswag
python -m experiments.train.csft --kl_decay 0.0 --model_name Qwen/Qwen3-8B --data_type mmlu