model-name: Qwen/Qwen2.5-1.5B-Instruct
train-file: /home/<anonymized>/data/math-hard-distr-qwen2.5-1.5B/train.parquet
val-file: /home/<anonymized>/data/math-hard-distr-qwen2.5-1.5B/val.parquet
output: /beegfs/scratch/user/<anonymized>/huggingface/math_hard_distr_{loss}_qwen2.5-1.5B
generation-length: 1024
batch-size: [128, 256, 512]
epochs: 1
learning-rate: [1e-7, 5e-7]
scheduler-type: 'reduce_lr_on_plateau'
reduce-lr-on-plateau-patience: 1
micro-batch-size: 2
val-micro-batch-size: 8
validation-interval: 16
track-features: [["reward"]]
metric-accumulation-samples: 2048
loss: js