data:
  train_batch_size: 2
  micro_batch_size: null # will be deprecated, use micro_batch_size_per_gpu
  micro_batch_size_per_gpu: 1  # this is also val batch size
  train_files: sft_data.parquet
  val_files: sft_data.parquet
  prompt_key: question
  response_key: answer
  # Multi-turn settings
  multiturn:
    enable: True  # to use multi-turn dataset
    messages_key: messages 
  custom_cls:
    path: null
    name: null
  max_length: 16384  
  truncation: error
  balance_dp_token: False
  chat_template: null
  rllm:
    tokenize_and_mask_method: stepwise
model:
  partial_pretrain: Qwen/Qwen2.5-Math-7B-Instruct
  fsdp_config:
    wrap_policy:
      min_num_params: 0
    cpu_offload: False
    offload_params: False
  external_lib: null
  enable_gradient_checkpointing: True
  trust_remote_code: True
  lora_rank: 0 
  lora_alpha: 16  
  target_modules: all-linear  
  use_liger: False
  strategy: fsdp2
optim:
  lr: 1e-5
  betas: [0.9, 0.95]
  weight_decay: 0.1
  warmup_steps_ratio: 0.05
  clip_grad: 1.0
ulysses_sequence_parallel_size: 1
use_remove_padding: False
trainer:
  default_local_dir: outputs/qwen2.5_math_sft
  default_hdfs_dir: null
  resume_path: null
  project_name: math-tool-sft
  experiment_name: qwen2.5-math-7b
  total_epochs: 3
  total_training_steps: null
  logger: [ 'console', 'wandb' ]
  seed: 1
  save_freq: -1
  test_freq: -1
  nnodes: 1
  n_gpus_per_node: 8
  max_ckpt_to_keep: null  # Maximum number of checkpoints to keep, set to null to keep all

  # Resume mode: "auto", "disable", or "resume_path"
  # "auto": resume from last checkpoint if available
  # "disable": start from scratch
  # "resume_path": resume from a user-defined path
  resume_mode: auto

  # Path to resume training from (used when resume_mode is "resume_path" or "auto")
  resume_from_path: null

  # Checkpoint configuration
  checkpoint:
    # What to include in saved checkpoints
    # with 'hf_model' you can save whole model as hf format, now only use sharded model checkpoint to save space
    save_contents: ["model", "optimizer", "extra"]

    # For more flexibility, you can specify the contents to load from the checkpoint.
    load_contents: ${trainer.checkpoint.save_contents}
  device: cuda