name: adam_copa_ft
project: zo_bench_Copa_llama-2-13b
command:
- ${interpreter}
- ${program}
- ${args}
- "--model_name=meta-llama/Llama-2-13b-hf"
- "--task_name=Copa"
- "--output_dir=result/Copa-ft-$TAG"
- "--num_train_epochs=5"
- "--no_reparam"
- "--per_device_train_batch_size=16"
- "--load_best_model_at_end"
- "--evaluation_strategy=epoch"
- "--save_strategy=epoch"
- "--save_total_limit=1"
- "--eval_steps=500"
# - "--max_steps=20000"
- "--logging_steps=10"
- "--num_eval=1000"
- "--num_train=1000"
- "--num_dev=100"
- "--train_as_classification=False"
- "--perturbation_mode=two_side"
- "--trainer=regular"
- "--optimizer=adam"
- "--train_set_seed=0"
- "--lr_scheduler_type=constant"
- "--eval_steps=500"
- "--save_steps=500"
- "--load_bfloat16"
- "--save_model"
- "--fp16"
- "--per device train batch size=8"
method: grid
metric:
  goal: maximize
  name: test_acc
parameters:
  learning_rate:
    values:
      #- 1e-4
      #- 1e-5
      - 1e-6
      - 1e-7
      # - 1e-8
  zo_eps:
    values:
      - 1e-3
      # - 1e-4
      # - 1e-5
  weight_decay:
    values:
      - 0

program: run.py