batch_exp_name: "AAA_batch_run_2_conv_neighbor_lora"

# torchrun --nproc_per_node=4 --master_port 18341 v2/train.py
# gpus: "0"
# gpus: "4, 5, 6, 7"
gpus: "0, 1, 2, 3"
# gpus: "0, 1, 2, 3, 4, 5, 6, 7"

transformers_args:
  # model args
  # train_from_scratch: true
  learning_rate: 5e-4
  do_eval: true
  # lr_scheduler_type: constant_with_warmup
  # weight_decay: 0.01
  data_dir: ["opencompass_mmbench"]
  remove_unused_columns: false
  vlm_model_path: "llava-hf/llava-v1.6-mistral-7b-hf"
  exp_name: "llava-mistral-core_div_A_neighbor_lora"
  lora_rank: 32
  num_train_epochs: 1.0
  # lora_target_modules: '^language_model\.model\.layers\.\d+\.(self_attn\.[qkvo]_proj|mlp\.(gate|up|down)_proj)$'  ##要用单引号，这样\才不会被转义
  # lora_target_modules: '^language_model\.model\.layers\.\d+\.self_attn\.[qkvo]_proj$'
  # lora_target_modules: '^language_model\.model\.layers\.\d+\.self_attn\.q_proj$'
  lora_target_modules: '^language_model\.model\.layers\.\d+\.mlp\.(gate|up|down)_proj$'
  # lora_target_modules: '^language_model\.model\.layers\.\d+\.mlp\.down_proj$'
  # ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
  requires_grad_list: null   # 设为null，没有多余模块需要训练
  # peft_model_path: "v2/output/llava-mistral-my_lora-20250308-11:09:11/checkpoint-459"
  my_lora: false
  use_peft: true
  peft_type: "lora"
  # zero2 a100
  # per_device_eval_batch_size: 1
  # per_device_train_batch_size: 1
  # gradient_accumulation_steps: 8
  # eval_accumulation_steps: 1
  # zero3 a100
  per_device_eval_batch_size: 1
  per_device_train_batch_size: 1
  gradient_accumulation_steps: 4
  eval_accumulation_steps: 1
  # deepspeed: "v2/config/zero3.json"


dynamic:
  transformers_args.data_dir:
    # - ["opencompass_mmbench"]                       
    # - ["opencompass_mathvision"] 
    # - ["hf_chartqa"]  
    # - ["hf_coco_caption"]  
    # - ["hf_textvqa"]    
    # - ["hf_ocrvqa"]       
    # - ["hf_a_okvqa"]     
    # - ["hf_scienceqa"]
    # - ["hf_visonlyqa"]
    - ["hf_pathvqa"]
    # - ["hf_slakevqa"]
  transformers_args.exp_name:
    # - "llava-mistral-ablation_neighbor_lora_gap_2"
    # - "llava-mistral-ablation_neighbor_lora"
    # - "llava-mistral-ablation_masked_lora"
    # - "llava-mistral-ablation_div_A_neighbor_lora"
    # - "llava-mistral-core_div_A_neighbor_lora"
    # - "llava-mistral-core_div_A_neighbor_lora_lr1e-4"
    # - "llava-mistral-sft"
    # - "llava-mistral-hydra_lora"
    # - "llava-mistral-r_lora"
    # - "llava-mistral-lora"
    # - "llava-mistral-lora_lr1e-4"
    # - "llava-mistral-lora_r8"
    # - "llava-mistral-ada_lora"
    - "llava-mistral-vb_lora"
    # - "llava-mistral-vera"
    # - "llava-mistral-ia3"
    # - "llava-mistral-dora"

overrides:
  transformers_args.exp_name:
    llava-mistral-core_div_A_neighbor_lora:
      transformers_args.my_lora: true
      transformers_args.peft_type: "my_lora"
    llava-mistral-core_div_A_neighbor_lora_lr1e-4:
      transformers_args.learning_rate: 1e-4
      transformers_args.my_lora: true
      transformers_args.peft_type: "my_lora"
    llava-mistral-ablation_masked_lora:
      transformers_args.my_lora: true
      transformers_args.peft_type: "ablation-masked-lora"
      transformers_args.learning_rate: 1e-4
    llava-mistral-ablation_neighbor_lora:
      transformers_args.my_lora: true
      transformers_args.peft_type: "ablation-neighbor-lora"
      # transformers_args.data_dir: ["hf_scienceqa"]
    llava-mistral-ablation_neighbor_lora_gap_2:
      transformers_args.my_lora: true
      transformers_args.peft_type: "ablation-neighbor-lora"
      transformers_args.neighbor_gap: 2
    llava-mistral-ablation_div_A_neighbor_lora:
      transformers_args.my_lora: true
      transformers_args.peft_type: "ablation-div-A-neighbor-lora"
      # transformers_args.data_dir: ["hf_textvqa"]
    llava-mistral-hydra_lora:
      transformers_args.my_lora: true
      transformers_args.peft_type: "hydra-lora"
      # transformers_args.learning_rate: 5e-8
    llava-mistral-lora:
      transformers_args.my_lora: false
      transformers_args.peft_type: "lora"
    llava-mistral-lora_lr1e-4:
      transformers_args.learning_rate: 1e-4
      transformers_args.my_lora: false
      transformers_args.peft_type: "lora"
    llava-mistral-lora_r4:
      transformers_args.lora_rank: 4
      transformers_args.my_lora: false
      transformers_args.peft_type: "lora"
    llava-mistral-lora_r8:
      transformers_args.lora_rank: 8
      transformers_args.my_lora: false
      transformers_args.peft_type: "lora"
    llava-mistral-ada_lora:
      transformers_args.my_lora: false
      transformers_args.peft_type: "ada-lora"
    llava-mistral-vb_lora:
      transformers_args.my_lora: false
      transformers_args.peft_type: "vb-lora"
      transformers_args.data_dir: ["hf_pathvqa"]
    llava-mistral-vera:
      transformers_args.my_lora: false
      transformers_args.peft_type: "vera"
    llava-mistral-ia3:
      transformers_args.my_lora: false
      transformers_args.peft_type: "ia3"
    llava-mistral-dora:
      transformers_args.my_lora: false
      transformers_args.peft_type: "dora"
    llava-mistral-r_lora:
      transformers_args.my_lora: true
      transformers_args.peft_type: "r-lora"
      transformers_args.data_dir: ["hf_slakevqa"]
      # transformers_args.learning_rate: 5e-8
    llava-mistral-sft:
      transformers_args.my_lora: false
      transformers_args.use_peft: false
      transformers_args.learning_rate: 1e-6
      

  

