trainer.nnodes: 1
trainer.n_gpus_per_node: 4
# data.path: $HOME/data/math-hard/train.parquet
data.path: $HOME/data/math-hard/train-unsolved-128.parquet
data.prompt_key: prompt
data.batch_size: 1024
data.n_samples: 1024
data.output_path: "/beegfs/scratch/user/<anonymized>/reasoning/train_generations/lean_unsolved.parquet"
model.path: /beegfs/scratch/user/<anonymized>/fcdpg-verl/DeepSeek-Prover-V1.5-SFT
# model.path: Qwen/Qwen2.5-1.5B-Instruct
rollout.temperature: 1
rollout.top_p: 1
rollout.top_k: -1
rollout.prompt_length: 1024
rollout.response_length: 1024
rollout.tensor_model_parallel_size: 1
rollout.gpu_memory_utilization: 0.9
rollout.max_num_batched_tokens: 98304 #65536
rollout.calculate_log_probs: "true"