# Tokenizer class or path. If null, it will be inferred from the model.
tokenizer: null

# Whether to use shared memory for data loading.
use_shm: False

# Training set parquet. Can be a list or a single file.
# The program will read all files into memory, so it can't be too large (< 100GB).
# The path can be either a local path or an HDFS path.
# For HDFS path, we provide utils to download it to DRAM and convert it to a local path.
train_files: ~/data/rlhf/gsm8k/train.parquet

# Validation parquet. Can be a list or a single file.
val_files: ~/data/rlhf/gsm8k/test.parquet

# The field in the dataset where the prompt is located. Default is 'prompt'.
prompt_key: prompt

# The field used to select the reward function (if using different ones per example).
reward_fn_key: data_source

# Maximum prompt length. All prompts will be left-padded to this length.
# An error will be reported if the length is too long.
# oc.select: default val for rollout.prompt_length
max_prompt_length: 512

# Maximum response length. Rollout in RL algorithms (e.g. PPO) generates up to this length.
# oc.select: default val for rollout.response_length
max_response_length: 512

# Batch size sampled for one training iteration of different RL algorithms.
train_batch_size: 1024

# Batch size used during validation. Can be null.
val_batch_size: null

# Batch size used during hard validation. Can be null.
hard_val_batch_size: null

# Whether to return the original input_ids without adding chat template.
# This is used when the reward model's chat template differs from the policy.
# If using a model-based RM with different templates, this should be True.
return_raw_input_ids: False

# Whether to return the original chat (prompt) without applying chat template.
return_raw_chat: False

# Whether to return the full prompt with chat template.
return_full_prompt: False

# Whether to shuffle the data in the dataloader.
shuffle: True

# num dataloader workers
dataloader_num_workers: 8

# Whether to shuffle the validation set.
validation_shuffle: False

# Whether to filter overlong prompts.
filter_overlong_prompts: False

# Number of workers for filtering overlong prompts.
# For large-scale datasets, filtering can be time-consuming.
# Use multiprocessing to speed up. Default is 1.
filter_overlong_prompts_workers: 1

# Truncate the input_ids or prompt if they exceed max_prompt_length.
# Options: 'error', 'left', 'right', 'middle'. Default is 'error'.
truncation: error

# The field in the multi-modal dataset where the image is located. Default is 'images'.
image_key: images

# The field in the multi-modal dataset where the video is located.
video_key: videos

# If the remote tokenizer has a Python file, this flag determines whether to allow using it.
trust_remote_code: False

# Optional: specify a custom dataset class path and name if overriding default loading behavior.
custom_cls:

  # The path to the file containing your customized dataset class. If not specified, pre-implemented dataset will be used.
  path: null

  # The name of the dataset class within the specified file.
  name: null

# Whether to return multi-modal inputs in the dataset. Set to False if rollout generates new multi-modal inputs.
return_multi_modal_inputs: True

# settings related to data sampler
sampler:

  # the path to the module containing a curriculum class which implements the
  # AbstractSampler interface
  class_path: null

  # the name of the curriculum class like `MySampler`
  class_name: null

# Data generation configuration for augmenting the dataset.
datagen:

  # The path to the file containing your customized data generation class.
  # E.g. 'pkg://verl.experimental.dynamic_dataset.dynamicgen_dataset'
  path: null

  # The class name of the data generation class within the specified file.
  # E.g. 'MockDataGenerator'
  name: null

# MATH-500 indices
# math_hard_indices: [4, 6, 15, 18, 34, 36, 37, 41, 45, 64, 66, 85, 92, 100, 120, 127, 133, 136, 149, 160, 161, 162, 166, 168, 202, 215, 243, 247, 256, 260, 270, 320, 361, 367, 381, 392, 396, 411, 450, 451, 452, 460, 496, 501, 503, 505, 511, 513, 520, 534, 563, 564, 571, 576, 579, 587, 596, 601, 607, 609, 612, 615, 622, 666, 673, 683, 684, 695, 700, 703, 709, 718, 722, 738, 748, 757, 761, 762, 782, 805, 817, 834, 840, 849, 853, 854, 859, 882, 885, 888, 906, 909, 933, 941, 962, 978, 985, 988, 991, 1008, 1033, 1037, 1046, 1048, 1054, 1058, 1067, 1073, 1085, 1088, 1095, 1111, 1119, 1123, 1127, 1128, 1131, 1136, 1144, 1145, 1150, 1172, 1173, 1180, 1188, 1190, 1194, 1196, 1215, 1243, 1250, 1251, 1258, 1262, 1271, 1281, 1285, 1287, 1290, 1302, 1308, 1311, 1312, 1322, 1339, 1359, 1374, 1380, 1402, 1441, 1442, 1449, 1513, 1531, 1540, 1543, 1552, 1555, 1576, 1603, 1612, 1620, 1690, 1710, 1715, 1730, 1764, 1767, 1769, 1788, 1790, 1791, 1801, 1806, 1820, 1842, 1843, 1880, 1890, 1897, 1901, 1905, 1908, 1932, 1935, 1940, 1963, 1967, 1981, 1996, 2001, 2006, 2011, 2041, 2047, 2053, 2057, 2062, 2063, 2078, 2110, 2119, 2120, 2143, 2148, 2150, 2151, 2170, 2186, 2191, 2196, 2199, 2210, 2214, 2215, 2217, 2231, 2236, 2237, 2238, 2246, 2253, 2263, 2264, 2275, 2289, 2294, 2297, 2303, 2311, 2323, 2324, 2325, 2327, 2328, 2334, 2352, 2359, 2360, 2371, 2382, 2384, 2397, 2404, 2409, 2413, 2416, 2473, 2505, 2512, 2515, 2522, 2536, 2539, 2546, 2569, 2571, 2579, 2602, 2607, 2609, 2611, 2622, 2628, 2637, 2647, 2681, 2682, 2700, 2707, 2731, 2752, 2758, 2767, 2799, 2802, 2808, 2816, 2838, 2851, 2863, 2868, 2876, 2883, 2896, 2907, 2937, 2938, 2946, 2966, 2977, 2991, 2994, 3018, 3019, 3020, 3022, 3024, 3035, 3037, 3046, 3047, 3058, 3067, 3072, 3079, 3080, 3105, 3126, 3134, 3141, 3165, 3181, 3186, 3187, 3196, 3200, 3210, 3220, 3226, 3236, 3240, 3246, 3287, 3295, 3299, 3317, 3320, 3323, 3334, 3341, 3342, 3344, 3350, 3352, 3365, 3366, 3369, 3375, 3392, 3404, 3411, 3417, 3419, 3420, 3440, 3444, 3447, 3460, 3467, 3474, 3480, 3498, 3507, 3511, 3519, 3529, 3539, 3541, 3548, 3549, 3569, 3586, 3604, 3607, 3646, 3647, 3658, 3669, 3700, 3711, 3725, 3730, 3732, 3738, 3740, 3741, 3752, 3768, 3769, 3773, 3779, 3802, 3805, 3824, 3849, 3856, 3878, 3913, 3923, 3941, 3942, 3951, 3982, 3990, 3994, 3999, 4011, 4034, 4036, 4042, 4043, 4046, 4055, 4071, 4074, 4088, 4090, 4104, 4108, 4127, 4149, 4150, 4155, 4157, 4158, 4160, 4177, 4181, 4190, 4193, 4210, 4222, 4235, 4242, 4253, 4265, 4272, 4279, 4297, 4303, 4315, 4326, 4333, 4352, 4368, 4384, 4404, 4413, 4423, 4425, 4441, 4449, 4451, 4479, 4487, 4500, 4515, 4523, 4533, 4535, 4547, 4549, 4550, 4569, 4584, 4590, 4591, 4597, 4600, 4603, 4610, 4626, 4657, 4666, 4678, 4697, 4706, 4713, 4731, 4744, 4751, 4753, 4758, 4765, 4776, 4796, 4812, 4834, 4850, 4857, 4861, 4866, 4868, 4871, 4885, 4896, 4900, 4909, 4914, 4924, 4926, 4947, 4955, 4964, 4969, 4978, 4990, 4992, 4993]
# 50 hardest examples according to gpt4
#[414, 333, 91, 295, 7, 758, 213, 595, 635, 427, 106, 437, 20, 724, 95, 525, 319, 594, 390, 447, 479, 616, 753, 620, 29, 230, 423, 50, 391, 249, 220, 23, 617, 770, 542, 683, 346, 564, 136, 352, 739, 382, 454, 734, 730, 121, 107, 727, 548, 361]
# gsm8k_hard_indices: [368, 454, 119, 406, 403, 380, 255, 12, 89, 87, 37, 75, 409, 187, 62, 423, 298, 236, 267, 7, 234, 439, 428, 359, 510, 353, 394, 197, 102, 340, 307, 443, 245, 201, 147, 241, 209, 453, 322, 494, 124, 306, 330, 107, 427, 508, 205, 199, 493, 304]
task: gsm8k

drop_samples_with_no_adv: False