# torchrun --nproc_per_node=4 --master_port 18341 v2/train.py
# gpus: "0"
# gpus: "4, 5, 6, 7"
gpus: "0, 1, 2, 3"
# gpus: "0, 1, 2, 3, 4, 5, 6, 7"

transformers_args:
  # model args
  # train_from_scratch: true
  # data_dir: ["opencompass_mmbench"]
  # data_dir: ["ocrbench"]
  data_dir: ["slake_vqa"]
  # data_dir: ["opencompass_mathvision"]
  # data_dir: ["hf_coco_caption"]

  remove_unused_columns: false
  vlm_model_path: "llava-hf/llava-v1.6-mistral-7b-hf"
  exp_name: "llava-mistral-ada_lora"
  lora_rank: 4
  # lora_target_modules: '^language_model\.model\.layers\.\d+\.(self_attn\.[qkvo]_proj|mlp\.(gate|up|down)_proj)$'  ##要用单引号，这样\才不会被转义
  lora_target_modules: '^language_model\.model\.layers\.\d+\.self_attn\.[qkvo]_proj$'
  # lora_target_modules: '^language_model\.model\.layers\.\d+\.mlp\.(gate|up|down)_proj$'
  # ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
  requires_grad_list: null   # 设为null，没有多余模块需要训练
  # peft_model_path: "v2/output/llava-mistral-my_lora-20250308-11:09:11/checkpoint-459"
  my_lora: false
  peft_type: "ada-lora"     ## lora, ada-lora, vb-lora, vera, prefix-tuning, prompt-tuning, ia3
  # zero2 a100
  # per_device_eval_batch_size: 1
  # per_device_train_batch_size: 1
  # gradient_accumulation_steps: 8
  # eval_accumulation_steps: 1
  # zero3 a100
  per_device_eval_batch_size: 1
  per_device_train_batch_size: 1
  gradient_accumulation_steps: 4
  eval_accumulation_steps: 1
  # deepspeed: "v2/config/zero3.json"

  

