# Tokenizer class or path. If null, it will be inferred from the model.
tokenizer: null

# Whether to use shared memory for data loading.
use_shm: False

# Training set parquet. Can be a list or a single file.
# The program will read all files into memory, so it can't be too large (< 100GB).
# The path can be either a local path or an HDFS path.
# For HDFS path, we provide utils to download it to DRAM and convert it to a local path.
train_files: ~/data/rlhf/gsm8k/train.parquet

# Validation parquet. Can be a list or a single file.
val_files: ~/data/rlhf/gsm8k/test.parquet

# The field in the dataset where the prompt is located. Default is 'prompt'.
prompt_key: prompt

# The field used to select the reward function (if using different ones per example).
reward_fn_key: data_source

# Maximum prompt length. All prompts will be left-padded to this length.
# An error will be reported if the length is too long.
# oc.select: default val for rollout.prompt_length
max_prompt_length: 512

# Maximum response length. Rollout in RL algorithms (e.g. PPO) generates up to this length.
# oc.select: default val for rollout.response_length
max_response_length: 512

max_start_length: 2048      # 新增
max_obs_length: 2560         # 新增

# Batch size sampled for one training iteration of different RL algorithms.
train_batch_size: 1024

# Batch size used during validation. Can be null.
val_batch_size: null

# Whether to return the original input_ids without adding chat template.
# This is used when the reward model's chat template differs from the policy.
# If using a model-based RM with different templates, this should be True.
return_raw_input_ids: False

# Whether to return the original chat (prompt) without applying chat template.
return_raw_chat: False

# Whether to return the full prompt with chat template.
return_full_prompt: False

# Whether to shuffle the data in the dataloader.
shuffle: True

# num dataloader workers
dataloader_num_workers: 8

# Whether to shuffle the validation set.
validation_shuffle: False

# Whether to filter overlong prompts.
filter_overlong_prompts: False

# Number of workers for filtering overlong prompts.
# For large-scale datasets, filtering can be time-consuming.
# Use multiprocessing to speed up. Default is 1.
filter_overlong_prompts_workers: 1

# Truncate the input_ids or prompt if they exceed max_prompt_length.
# Options: 'error', 'left', 'right', 'middle'. Default is 'error'.
truncation: error

# The field in the multi-modal dataset where the image is located. Default is 'images'.
image_key: images

# The field in the multi-modal dataset where the video is located.
video_key: videos

# If the remote tokenizer has a Python file, this flag determines whether to allow using it.
trust_remote_code: False

# Optional: specify a custom dataset class path and name if overriding default loading behavior.
custom_cls:

  # The path to the file containing your customized dataset class. If not specified, pre-implemented dataset will be used.
  path: null

  # The name of the dataset class within the specified file.
  name: null

# Whether to return multi-modal inputs in the dataset. Set to False if rollout generates new multi-modal inputs.
return_multi_modal_inputs: True

# settings related to data sampler
sampler:

  # the path to the module containing a curriculum class which implements the
  # AbstractSampler interface
  class_path: null

  # the name of the curriculum class like `MySampler`
  class_name: null

# Data generation configuration for augmenting the dataset.
datagen:

  # The path to the file containing your customized data generation class.
  # E.g. 'pkg://verl.experimental.dynamic_dataset.dynamicgen_dataset'
  path: null

  # The class name of the data generation class within the specified file.
  # E.g. 'MockDataGenerator'
  name: null

# Additional kwargs when calling tokenizer.apply_chat_template
apply_chat_template_kwargs: {}
