# torchrun --nproc_per_node=4 --master_port 18341 v2/train.py
# gpus: "0"
# gpus: "4, 5, 6, 7"
gpus: "0, 1, 2, 3"
# gpus: "0, 1, 2, 3, 4, 5, 6, 7"

transformers_args:
  # model args
  # train_from_scratch: true
  # data_dir: ["opencompass_mmbench"]
  # data_dir: ["ocrbench"]
  # data_dir: ["hf_scienceqa"]
  # data_dir: ["opencompass_mathvision"]
  # data_dir: ["hf_coco_caption"]
  # data_dir: ["hf_slakevqa"]
  data_dir: ["hf_visonlyqa"]

  remove_unused_columns: false
  vlm_model_path: "llava-hf/llava-v1.6-mistral-7b-hf"
  peft_model_path: /datadisk/med-mllm/v2/output/AAA_batch_run_2_conv_neighbor_lora-20250616-18:08:16/llava-mistral-vera-['hf_visonlyqa']-rank32-^language_model.model.layers.d+.mlp.(gate|up|down)_proj$-None-20250616-18:08:44/checkpoint-625
  exp_name: "eval"
  lora_rank: 32
  requires_grad_list: null   # 设为null，没有多余模块需要训练
  do_train: false
  do_eval: true
  my_lora: false
  use_peft: true
  peft_type: "vera"     ## lora, ada-lora, vb-lora, vera, prefix-tuning, prompt-tuning, ia3
  per_device_eval_batch_size: 1
  per_device_train_batch_size: 1
  gradient_accumulation_steps: 4
  eval_accumulation_steps: 1
  # deepspeed: "v2/config/zero3.json"

  

