export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7

model_name_or_path=$1 # /workspace/output/medqa_sft/Qwen2.5-1.5B-Instruct/medqa_train_ds_seed
output_path=$2 # /workspace/output/grpo_medqa

dataset_name=/workspace/HSIR/data/MedQA/medqa_grpo_unlabeled.json

mkdir -p $output_path

ACCELERATE_LOG_LEVEL=info accelerate launch --config_file /workspace/HSIR/H-GRPO/recipes/zero3.yaml \
    --num_processes=7 /workspace/HSIR/H-GRPO/open_r1/grpo.py \
    --model_name_or_path $model_name_or_path \
    --output_dir $output_path \
    --dataset_name $dataset_name \
    --learning_rate 1e-6 \
    --config /workspace/HSIR/H-GRPO/recipes/H-GRPO_MedQA.yaml 2>&1 | tee $output_path/train.log
