CUDA_VISIBLE_DEVICES=7 python train_PRM_preference.py --model_name Qwen/Qwen2.5-Math-PRM-7B \
--cache_dir ./cache --dataset_path ./curriculum_learning/split_4_CL0_sameques_aug/ \
--output_dir curriculum_learning/Qwen2.5-Math-PRM-7B-pref_0.0_to_0.1_CL3_sameques_aug \
--is_lora \
--lora_checkpoint ./curriculum_learning/Qwen2.5-Math-PRM-7B-pref_0.1_to_0.3_CL2_sameques_aug/checkpoint-12348/


CUDA_VISIBLE_DEVICES=0 python train_PRM_preference.py --model_name Qwen/Qwen2.5-Math-PRM-7B --cache_dir ./cache \
 --dataset_path ./curriculum_learning/samestep_aug/ --output_dir curriculum_learning/Qwen2.5-Math-PRM-7B-pref_samestep_sameques_aug

