defaults:
  - _self_
  - model_cfg@_global_: qwen25/qwen1bi
  - data_cfg@_global_: r1_countdown
  - trainer_cfg@_global_: grpo
  - run_cfg@_global_: default

save_strategy: steps
save_steps: 25
push_to_hub: false
tags:

per_device_train_batch_size: 1
vllm_gpu_memory_utilization: 0.3
vllm_mode: "colocate"

logging_strategy: steps
logging_steps: 5
report_to: "wandb"
wandb_project: rl4lm
wandb_group_name: ${data_log_name}/${model_log_name}
wandb_run_name: ${trainer_log_name}

results_dir: results
exp_name: ${now:%Y.%m.%d}${now:%H%M%S}
output_dir: ${results_dir}/${wandb_group_name}/${wandb_run_name}/${exp_name}

call_post_training: null

resume_from:

seed: 42

hydra:
  run:
    dir: ${output_dir}

