# [NOTE] Major things to be modified:
# - base_run_name (IMPORTANT: Now, we don't add 'from-mnli' at the end of the run. Handled automatically)
# - from_mnli : {true, false}
# - model configurations
# - local_pretrain_checkpoint_folder : Set to the same ckpt path for pretraining.
# - wandb (project name)

# Whether to run the various GLUE jobs serially or in parallel (use parallel=True to take advantage of multiple GPUs)
parallel: true

# Basic run configuration, additional details will be added to this name for each GLUE task, and each random seed
base_run_name: chimera
from_mnli: false
precision: amp_bf16 
num_task_run: 5
task_name: false
max_seq_len: 256

# Tokenizer for dataset creation
tokenizer_name: bert-base-uncased

# Model
model:
  name: bert
  use_pretrained: false # Train the model from scratch. Set to true to start from the HF off-the-shelf weights.
  pretrained_model_name: ${tokenizer_name}
  tokenizer_name: ${tokenizer_name}
  model_config:
    num_hidden_layers: 23
    max_position_embeddings: 256

    use_position_embeddings: False
    hidden_size: 768

# Loading
local_pretrain_checkpoint_folder: # e.g., ./${base_run_name}
starting_checkpoint_load_path: ${local_pretrain_checkpoint_folder}/latest-rank0.pt

# When starting from mnli
# local_pretrain_checkpoint_folder: ./local-finetune-checkpoints/{base_run_name}/task=mnli/seed=19/
# starting_checkpoint_load_path: ${local_pretrain_checkpoint_folder}/ep3-ba24546-rank0.pt

# Saving
save_finetune_checkpoint_prefix: # e.g., ./ (local)
# save_finetune_checkpoint_prefix: s3://<bucket>/remote-finetune-checkpoints # (remote)
save_finetune_checkpoint_folder: ${save_finetune_checkpoint_prefix}/${base_run_name}

# # (Optional) W&B logging
# loggers:
#   wandb:
#     project:      # Fill this in if using W&B
#     entity:      # Fill this in if using W&B

# Callbacks
callbacks:
  lr_monitor: {}
  speed_monitor: {}

# Scheduler
scheduler:
  name: linear_decay_with_warmup
  t_warmup: 0.06dur
  alpha_f: 0.0

# Task configuration
tasks:
  mnli:
    # Specify any extra task-specific arguments for the trainer here
    trainer_kwargs:
      # We keep one MNLI checkpoint locally so that we can start finetuning of
      # RTE, MRPC and STS-B from the MNLI checkpoint
      save_num_checkpoints_to_keep: 1
      max_sequence_length: 256
      lr: 1e-4
      wd: 5e-6
      max_duration: 2ep
  rte:
    trainer_kwargs:
      save_num_checkpoints_to_keep: 0
      max_sequence_length: 256
      lr: 1e-5
      wd: 1e-6
      max_duration: 3ep
  qqp:
    trainer_kwargs:
      save_num_checkpoints_to_keep: 0
      max_sequence_length: 256
      lr: 5e-5
      wd: 3e-6
      max_duration: 3ep
  qnli:
    trainer_kwargs:
      save_num_checkpoints_to_keep: 1
      max_sequence_length: 256
      lr: 5e-5
      wd: 1e-6
      max_duration: 7ep
  sst2:
    trainer_kwargs:
      save_num_checkpoints_to_keep: 0
      max_sequence_length: 256
      lr: 5e-5
      wd: 3e-6
      max_duration: 2ep
  stsb:
    trainer_kwargs:
      save_num_checkpoints_to_keep: 0
      max_sequence_length: 256
      lr: 3e-5
      wd: 3e-6
      max_duration: 10ep
  mrpc:
    trainer_kwargs:
      save_num_checkpoints_to_keep: 0
      max_sequence_length: 256
      lr: 8e-5
      wd: 8e-6
      max_duration: 10ep
  cola:
    trainer_kwargs:
      save_num_checkpoints_to_keep: 0
      max_sequence_length: 256
      lr: 1e-4
      wd: 5e-6
      max_duration: 10ep
