env_defaults:
  NODES: 4
  GPUS: 8
  MEM: 32
  SLA: Premium # Premium, Standard, Basic
  PRIORITY: high

search:
  job_template:
    name: dpo_unkVQA_qwen_${NODES}x${GPUS}GPU_bsz{bsz}_gacc{grad_accu}_lr{lr}
    sku: ${NODES}x${MEM}G${GPUS}-IB
    sla_tier: ${SLA} 
    priority: ${PRIORITY}
    # mpi: False
    command:
     - python run_dpo.py 
        --deepspeed ./dpo_config/deepspeed/zero3.json 
        --model_name_or_path models/Qwen/Qwen-VL-Chat    
        --data_path <DATA_FOLDER>/MMInstruction/vlfeedback_80k.jsonl    
        --image_folder <DATA_FOLDER>/MMInstruction/merged_images  
        --fp16 True 
        --bf16 False 
        --tf32 False 
        --output_dir <OUTPUT_FOLDER>/qwen-vl/dpo_silkie_ep{ep}  
        --num_train_epochs {ep}  
        --per_device_train_batch_size {bsz}  
        --per_device_eval_batch_size  {bsz} 
        --gradient_accumulation_steps {grad_accu}  
        --evaluation_strategy "steps" 
        --eval_steps 500 
        --save_strategy "steps" 
        --save_steps 500 
        --save_total_limit 10 
        --learning_rate {lr} 
        --weight_decay 0.05 
        --warmup_ratio 0.1 
        --adam_beta2 0.98 
        --lr_scheduler_type "cosine" 
        --logging_steps 10 
        --model_max_length 2048 
        --gradient_checkpointing True 
        --use_lora True 
        --report_to wandb 
        --logging_first_step True 
        --remove_unused_columns False 
    process_count_per_node: 8
    submit_args: 
      env:
        NCCL_IB_DISABLE: 0
        NCCL_DEBUG: INFO
        NCCL_IB_TIMEOUT: 60
        NCCL_ASYNC_ERROR_HANDLING: 0
        MKL_THREADING_LAYER: GNU
        max_attempts: 1
  type: grid
  max_trials: 10
  params:
    - name: lr
      spec: discrete
      values: [1e-5]
    - name: grad_accu
      spec: discrete
      values: [4]
    - name: bsz
      spec: discrete
      values: [1]
      # values: [1]
    - name: ep
      spec: discrete
      values: [1]
      # values: [2, 3]
      # values: [1]
