#!/bin/bash
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
export PYTHONPATH=$(pwd)/train/stage_rl
export WANDB_MODE=disabled

# Dataset
export TASK_NAME=vqa-rad-zero
export DATASET_NAME=/path/to/dataset/json/

# IMAGE PATH
export IMAGE_PATH=/path/to/imgs/files/

# MODEL PATH
export MODEL_NAME_OR_PATH=/your/path/to/model

# OUTPUT MODEL PATH
export OUTPUT_DIR=/your/path/to/output/model


if [ ! -d "$OUTPUT_DIR" ]; then
  mkdir "$OUTPUT_DIR"
fi

# Debug
export DEBUG_MODE="True"
export LOG_PATH=${OUTPUT_DIR}/reward.log

#   

torchrun --nproc_per_node=5 --nnodes=1 --master-port=29516 \
  train/stage_rl/grpo.py \
  --deepspeed scripts/train/zero3.json \
  --output_dir ${OUTPUT_DIR} \
  --model_name_or_path ${MODEL_NAME_OR_PATH} \
  --dataset_name ${DATASET_NAME} \
  --image_path ${IMAGE_PATH} \
  --task_name ${TASK_NAME} \
  --use_vllm_for_gen true \
  --use_system_prompt false \
  --max_prompt_length 4096 \
  --max_completion_length 512 \
  --num_generations 4 \
  --per_device_train_batch_size 1 \
  --gradient_accumulation_steps 2 \
  --logging_steps 1 \
  --bf16 \
  --gradient_checkpointing true \
  --attn_implementation flash_attention_2 \
  --max_pixels 480000 \
  --save_steps 600 \
  --num_train_epochs 1 \
  --save_only_model \
  --resume_from_checkpoint true \
  2>&1 | tee ${OUTPUT_DIR}/train.log
