python get_self_cot_sanity_check.py \
 --data_root_path /data/jiateng_data/cot_new/evaluation_results_Qwen_Qwen3_32B_Generated_data_layer_3_task_5_structure_3_args_5_20250804_173608.json \
 --output_dataset_name self_cot_Qwen_Qwen3_32B_Generated_data_layer_3_task_5_structure_3_args_5_1000_sanity_check \
 --dataset_size 1000 \
 --output_root_path /code/jiateng-sandbox/intern_project/third_party/LLaMA-Factory/data 

python get_self_cot_sanity_check.py \
 --data_root_path /data/jiateng_data/cot_new/evaluation_results_Qwen_Qwen3_32B_Generated_data_layer_3_task_5_structure_2_args_5_20250804_172644.json \
 --output_dataset_name self_cot_Qwen_Qwen3_32B_Generated_data_layer_3_task_5_structure_2_args_5_1000_sanity_check \
 --dataset_size 1000 \
 --output_root_path /code/jiateng-sandbox/intern_project/third_party/LLaMA-Factory/data 

python get_self_cot_sanity_check.py \
 --data_root_path /data/jiateng_data/cot_new/evaluation_results_Qwen_Qwen3_32B_Generated_data_layer_3_task_5_structure_1_args_5_20250807_160044.json \
 --output_dataset_name self_cot_Qwen_Qwen3_32B_Generated_data_layer_3_task_5_structure_1_args_5_1000_sanity_check \
 --dataset_size 1000 \
 --output_root_path /code/jiateng-sandbox/intern_project/third_party/LLaMA-Factory/data 