
export CKPT_PATH=./CKPTS/Llama-3.2-1B-Instruct/original
export SAVE_PATH=./ckpts/GRPO_8331_raw_data_llama3.2_1B_fusion
export CUDA_VISIBLE_DEVICES=0,1,2,3



torchrun --nproc_per_node="1" \
    --nnodes="1" \
    --node_rank="0" \
    --master_addr="127.0.0.1" \
    --master_port="12345" \
    grpo_train.py \
    --output_dir ${SAVE_PATH}  \
    --llama_path ${CKPT_PATH} \
    --model_ckpt ./ckpts/retrain_llama3.2_1B_8331_raw_data/checkpoint_34.pth \
    --vit_path  ./CKPTS/vit-base-patch16-224 \
    --per_device_train_batch_size 2 \
    --deepspeed ./util/zero2.json \
    --gradient_accumulation_steps 2 \
    --logging_steps 1 \
    --report_to wandb \
    --gradient_checkpointing false \
    --num_train_epochs 1 \
    --run_name llama3-1B_GRPO \
    --save_steps 500 \
    --save_only_model True \
    --save_safetensors False \
    --num_generations 2 \
    --max_prompt_length=256 \
    --max_completion_length=600 \
    --log_on_each_node=False \
   
