name: "aim"

max_epochs: 1
max_steps: -1
gradient_accumulation_steps: 1
gradient_clip_val: 1.0
gradient_clip_algorithm: "norm"
accumulate_grad_batches: 1
collator: "DataCollatorForAIM"
pad_to_multiple_of: 8
strategy: "auto"
precision: bf16-mixed
log_every_n_steps: 50
save_every_n_steps: 10000
val_check_interval: 10000

loss: "mse"

aim_impl: "all_words_last_token"

align_all_layers: false

# TODO: move to optimizer
lr: 1e-4
