# torchrun --nproc_per_node=4 --master_port 18340 v2/train.py
# gpus: "0"
gpus: "0, 1, 2, 3"

transformers_args:
  # model args
  # train_from_scratch: true
  remove_unused_columns: false
  peft_model_path: "llava-hf/llava-v1.6-mistral-7b-hf"
  exp_name: "llava-mistral-hook"
  hook_model: true
  requires_grad_list: ["multi_modal_projector", "language_model.lm_head", "emb_prob_block"]
  # zero2 a100
  # per_device_eval_batch_size: 1
  # per_device_train_batch_size: 1
  # gradient_accumulation_steps: 8
  # eval_accumulation_steps: 1
  # zero3 a100
  per_device_eval_batch_size: 1
  per_device_train_batch_size: 1
  gradient_accumulation_steps: 8
  eval_accumulation_steps: 8

  

