project_name: mixed_gemma

ckpt_path: saved_models/gemma-aligned-new-1e-6.pt

data_path: princeton-nlp/gemma2-ultrafeedback-armorm

gradient_accumulation: 16

mixed_precision: bf16

policy_model_path:  google/gemma-2-9b-it

ref_model_path: google/gemma-2-9b-it

policy_tokenizer_name: google/gemma-2-9b-it

use_grad_ckpt: True

on_policy: False

max_length: 3000

batch_size: 1

clip_grad_norm: 1.0

warmup_steps: 500

learning_rate: 1e-6

optimizer: AdamW

length_norm: True


save_per_step: 1000

epoch: 1

seed: 666
