#!/bin/bash
export CUDA_VISIBLE_DEVICES=4,5,6,7

set -x

export PYTHONUNBUFFERED=1

MODEL_PATH=pretrained_models/Qwen2.5-VL-7B-Instruct  # replace it with your local file path
RUN_NAME=$(basename "$0" .sh)

python3 -m verl.trainer.main \
    config=config.yaml \
    data.train_files=MMMU/MMMU@validation \
    data.val_files=None \
    data.insert_ground_truth=True \
    worker.actor.model.model_path=${MODEL_PATH} \
    worker.actor.clip_ratio_low=0.2 \
    worker.actor.clip_ratio_high=0.28 \
    worker.reward.reward_function=./model/reward.py:compute_score \
    algorithm.disable_kl=True \
    algorithm.online_filtering=False \
    trainer.experiment_name=${RUN_NAME} \
    trainer.n_gpus_per_node=4 \
    trainer.save_freq=200 \
    trainer.save_checkpoint_path=scienceqa_checkpoint/${RUN_NAME}