### model
model_name_or_path: saves/gemma-2b/full/ulfeedback_train40K_reward_001sft_correct/checkpoint-5303 #saves/qwem2.5-1.5b/full/qwen_orm_A_qwen_sed_sft_detach_1_1_no_step/checkpoint-12000 

### method
stage: orm_dg
do_train: True
finetuning_type: full
deepspeed: examples/deepspeed/ds_z0_config.json #ds_z3_offload_config.json

### dataset
dataset: no_step_qwen_test_G_BON_7B #qwen_test_G_BON_7B #BON_G_any #qwen_test_G #prm_test
template: gemma
cutoff_len: 2048
max_samples: 1000000
overwrite_cache: true
preprocessing_num_workers: 16
new_special_tokens: "[STEP],[REQ],[SPACE]"

### output
output_dir: saves/qwem2.5-1.5b/full/orm_A_infer_G
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true

### eval
val_size: 0.1
per_device_eval_batch_size: 8
eval_strategy: steps
eval_steps: 500
