lightning_model: DPOModel
dataset: "DPODataset"
collate_fn: "dpo_collate_fn"
data_in_memory: False
batch_size: 1
num_workers: 4
load_model_filename: null
resume_training_path: null
lightning_model_args:
  prompt_length: 8
  max_length: 256
  beta: 0.5
  do_preference_reg: False
  optimizer: "Adam"
  optimizer_args:
    lr: 5.0e-06
  lr_scheduler: "ReduceLROnPlateau"
  lr_scheduler_args:
    patience: 5
  monitor: "val/total_loss"
trainer_args:
  accelerator: "cuda"
  log_every_n_steps: 250
  max_epochs: 1000
  devices: [0, 1, 2, 3]
  strategy: "fsdp"
  enable_progress_bar: True
  inference_mode: False
  precision: "16-mixed"
seed_args:
  seed: 42
  workers: True
dataset_split_args:
  train: 0.8
  val: 0.2
  test: 0.0