seed: 1
exp_name: dummy
datasets:
- shp
- hh
- oasst
mode: train
debug: false
use_fsdp: true
fsdp_port: 41939
wandb:
  enabled: false
  entity: null
  project: archangel
cache_dir: /data1/models
local_run_dir: /data1/models/dummy
do_first_eval: false
minimum_log_interval_secs: 1.0
intermediate_checkpoints: true
trainer: BasicTrainer
lr: 5.0e-07
n_epochs: null
n_examples: 32
optimizer: RMSprop
warmup_steps: 150
eval_every: 32
n_samples: 128
samples_dir: samples/
n_eval_examples: 512
saved_policy: /data1/models/dummy/LATEST/policy.pt
top_p: 0.95
human_prefix: '

  <|user|>

  '
assistant_prefix: '

  <|assistant|>

  '
human_suffix: ''
assistant_suffix: ''
frac_unique_desirable: 1.0
frac_unique_undesirable: 1.0
model:
  name_or_path: ContextualAI/archangel_sft_pythia1-4b
  tokenizer_name_or_path: null
  load_from: null
  block_name: GPTNeoXLayer
  policy_dtype: bfloat16
  fsdp_policy_mp: null
  reference_dtype: bfloat16
  max_grad_norm: 10.0
  v_head_max_grad_norm: 0.1
  max_length: 2048
  max_prompt_length: 1024
  activation_checkpointing: true
  batch_size: 32
  gradient_accumulation_steps: 1
  eval_batch_size: 8
  use_flash_attention: false
loss:
  name: ppo
  beta: 0.1
  critic_coef: 0.5
  trainer: DPOTrainer
  dataloader: PairedPreferenceDataLoader
  use_reference_model: true
