model-name: /beegfs/scratch/user/<anonymized>/fcdpg-verl/DeepSeek-Prover-V1.5-SFT
# train-file: /home/<anonymized>/data/math-hard-distr-llama3.2-3B/train.parquet
# val-file: /home/<anonymized>/data/math-hard-distr-llama3.2-3B/val.parquet
train-file: /home/<anonymized>/data/lean/train.parquet
val-file: /home/<anonymized>/data/lean/val.parquet
max-train-samples: 200_000
max-val-samples: 1_000
output: /beegfs/scratch/user/<anonymized>/huggingface/lean_distr_{loss}_dsprover_1.5_sft
generation-length: 1024
# add-special-tokens-context: True
batch-size: 128
epochs: 1
learning-rate: 1e-6
warmup-steps: 50
scheduler-type: 'constant'
reduce-lr-on-plateau-patience: 1
micro-batch-size: 1
val-micro-batch-size: 8
# no-validate-on-start: true
validation-interval: 16
track-features: [["reward"]]
metric-accumulation-samples: 2048
loss: alpha
alpha: [0.99]