# torchrun --nproc_per_node=4 --master_port 18343 v2/train.py
# gpus: "0"
gpus: "0,1,2,3"
# gpus: "0, 1, 2, 3"

transformers_args:
  # model args
  # train_from_scratch: true
  data_dir: ["hf_pathvqa"]
  remove_unused_columns: false
  vlm_model_path: "llava-hf/llava-v1.6-mistral-7b-hf"
  peft_model_path: "/datadisk/med-mllm/v2/output/AAA_batch_run_2_conv_neighbor_lora-20250617-11:14:23/llava-mistral-sft-['hf_pathvqa']-rank32-^language_model.model.layers.d+.mlp.(gate|up|down)_proj$-None-20250617-14:05:21/checkpoint-625"
  
  exp_name: "eval"
  use_peft: false
  my_lora: false
  peft_type: "vb-lora"
  
  # deepspeed: null
  do_train: false
  do_eval: true
  requires_grad_list: []
  # zero2 a100
  # per_device_eval_batch_size: 1
  # per_device_train_batch_size: 1
  # gradient_accumulation_steps: 8
  # eval_accumulation_steps: 1
  # zero3 a100
  per_device_eval_batch_size: 1
  per_device_train_batch_size: 1
  gradient_accumulation_steps: 8
  eval_accumulation_steps: 1

  


