#!/bin/bash

set -x

MODEL_PATH=/mnt/r-contentsecurity-p/common/checkpoints/duanxian/Qwen3-VL-8B-Instruct  # replace it with your local file path
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7,8,9,10,11 \
python3 -m verl.trainer.main \
    config=examples/config.yaml \
    data.train_files=/ossfs/workspace/data/train/train-00000-of-00001.parquet@train \
    data.val_files=/ossfs/workspace/data/val/test-00000-of-00001.parquet@train \
    data.mini_rollout_batch_size=96 \
    worker.actor.model.model_path=${MODEL_PATH} \
    worker.actor.clip_ratio_low=0.2 \
    worker.actor.clip_ratio_high=0.28 \
    algorithm.disable_kl=True \
    trainer.experiment_name=qwen3_vl_8b_geo_dapo1 \
    trainer.n_gpus_per_node=12
