#!/bin/bash

set -x

MODEL_PATH=/mnt/r-contentsecurity-p/common/checkpoints/duanxian/Qwen3-VL-8B-Instruct  # replace it with your local file path
TRAIN_PARQUET=/mnt/r-contentsecurity-p/common/datas_yl/langdao/data/v2/rl/merged_data.parquet
VSTAR_TEST_PARQUET=/mnt/r-contentsecurity-p/common/datas_yl/langdao/data/v2/rl/merged_val.parquet

CUDA_VISIBLE_DEVICES=0,1,2,3,5,6,7,8,9,10,11,12 \
python3 -m verl.trainer.main \
    config=examples/perception_config.yaml \
    data.train_files=${TRAIN_PARQUET} \
    data.val_files=${VSTAR_TEST_PARQUET} \
    data.mini_rollout_batch_size=96 \
    worker.actor.model.model_path=${MODEL_PATH} \
    worker.actor.clip_ratio_low=0.2 \
    worker.actor.clip_ratio_high=0.3 \
    worker.rollout.max_num_batched_tokens=22528 \
    algorithm.disable_kl=True \
    trainer.experiment_name=qwen3_vl_8b_perception_dapo_debug \
    trainer.save_checkpoint_path=/mnt/r-contentsecurity-p/common/datas_yl/langdao/verl_exp/qwen3_vl_8b_perception_dapo_debug \
    trainer.n_gpus_per_node=12 \
    trainer.total_epochs=5 \
    trainer.load_checkpoint_path=/mnt/r-contentsecurity-p/common/datas_yl/langdao/verl_exp/qwen3_vl_8b_perception_dapo/global_step_60
