# Format checks enforced on CI:
# 1. Comments must appear above each field.
# 2. There must be a blank line between each field.
# 3. Inline comments (after a field on the same line) are not allowed.
# 4. Indentation level is respected for nested fields.

# <folder_name>@<field_name>.<field_name>: <yaml_file_name>

defaults:
  - model@model: hf_model
  - engine@engine: fsdp
  - optim@optim: fsdp
  - _self_

data:
  train_batch_size: 256 # global batch size
  micro_batch_size_per_gpu: 4  # this is also val batch size
  max_token_len_per_gpu: 8192
  use_dynamic_bsz: True
  train_files: ~/data/gsm8k/train.parquet
  val_files: null
  train_max_samples: -1  # set to -1 to use full dataset
  val_max_samples: -1  # set to -1 to use full dataset
  # Multi-turn settings
  messages_key: messages  # Key for messages list in multi-turn mode
  tools_key: tools  # Key for tools list in multi-turn mode
  enable_thinking_key: enable_thinking  # Whether to enable thinking in multi-turn mode
  pad_mode: no_padding
  # for right padding
  max_length: 1024
  truncation: error
  balance_dp_token: False # to be implement
  custom_cls:
    path: null
    name: null
  use_shm: False
  apply_chat_template_kwargs: {}

  # MultiTurnSFTDataset apply_chat_template to each turn separately and concat `input_ids`
  # as a whole sequence, which may not equal to apply_chat_template to whole messages at once. 
  # For example, Qwen Thinking series models add <think></think> tags to last turn, please check
  # your tokenizer chat template settings.
  # Set to True to ignore input_ids mismatch and use the concatenated input_ids as the final input_ids.
  ignore_input_ids_mismatch: False

# Checkpoint configuration
checkpoint:
  _target_: verl.trainer.config.CheckpointConfig
  # What to include in saved checkpoints
  # with 'hf_model' you can save whole model as hf format, now only use sharded model checkpoint to save space
  save_contents: ["model", "optimizer", "extra"]

  # For more flexibility, you can specify the contents to load from the checkpoint.
  load_contents: ${checkpoint.save_contents}

trainer:
  default_local_dir: checkpoints/${trainer.project_name}/${trainer.experiment_name}
  default_hdfs_dir: null
  project_name: gsm8k-sft
  experiment_name: test
  total_epochs: 4
  total_training_steps: null
  logger: [ 'console', 'wandb' ]
  seed: 1
  save_freq: -1
  test_freq: -1
  max_ckpt_to_keep: null  # Maximum number of checkpoints to keep, set to null to keep all

  # Resume mode: "auto", "disable", or "resume_path"
  # "auto": resume from last checkpoint if available
  # "disable": start from scratch
  # "resume_path": resume from a user-defined path
  resume_mode: auto

  # Path to resume training from (used when resume_mode is "resume_path" or "auto")
  resume_from_path: null  
  device: cuda

  nnodes: 1
  n_gpus_per_node: 1
