model_name: "gpt2-medium"
max_length: 1024
train_dataset: "single"
annotator: "gold_rm_preference"
model_directory: "gpt2-medium"
n_ensembles: 5
train_test_split: 0.02
training_kwargs:
  num_train_epochs: 1
  learning_rate: 3.e-5
  lr_scheduler_type: "cosine"
  warmup_steps: 20
  per_device_train_batch_size: 1
  per_device_eval_batch_size: 1
  gradient_accumulation_steps: 64
  gradient_checkpointing: True
  ddp_find_unused_parameters: True
  remove_unused_columns: False
  bf16: True
  report_to: "wandb"
  logging_steps: 10
  evaluation_strategy: "steps"
  eval_steps: 100
  eval_accumulation_steps: 1
  save_strategy: "steps"
  save_steps: 100
  save_total_limit: 1
  load_best_model_at_end: True
  save_safetensors: False
lora_config:
  task_type: "SEQ_CLS"
  inference_mode: False
  r: 8
  lora_alpha: 32
  lora_dropout: 0.1
