conda activate verl-grpo-baseline
bash /hpc2hdd/home/zyang398/***/projs/criticRL/TRPO-v4/scripts/deepscaler/inference/qwen1.5b_math_3k.sh
bash /hpc2hdd/home/zyang398/***/projs/criticRL/TRPO-v4/scripts/deepscaler/inference/qwen1.5b_deepscaler_cot_3k.sh
conda activate criticrl-v2
bash /hpc2hdd/home/zyang398/***/projs/criticRL/criticrl-v2/scripts/deepscaler/train/qwen1.5b_deepscaler_cot_3k.sh