project_name: mixed_mistral

ckpt_path: saved_models/mistral-aligned-5e-7.pt

data_path: princeton-nlp/mistral-instruct-ultrafeedback

gradient_accumulation: 16

mixed_precision: bf16

policy_model_path: mistralai/Mistral-7B-Instruct-v0.2

ref_model_path: mistralai/Mistral-7B-Instruct-v0.2

policy_tokenizer_name: mistralai/Mistral-7B-Instruct-v0.2

use_grad_ckpt: True

on_policy: False

max_length: 4096

batch_size: 1

clip_grad_norm: 1.0

warmup_steps: 500

learning_rate: 5e-7

optimizer: AdamW

length_norm: True

save_per_step: 1000

epoch: 1

seed: 888