model_name_or_path: Skywork/Skywork-Reward-Gemma-2-27B-v0.2
dataset_name: datasets/hh-rlhf-helpful-base
dataset_test_split: test
output_dir: outputs/reward/gemma-2-27b-Reward-hh
run_name: gemma-2-27b-Reward-hh
per_device_train_batch_size: 8
per_device_eval_batch_size: 8
gradient_accumulation_steps: 1
num_train_epochs: 1
lr_scheduler_type: cosine
gradient_checkpointing: true
learning_rate: 1.0e-4
logging_steps: 20
eval_strategy: steps
eval_steps: 200
save_strategy: 'no'
save_steps: 1000
max_length: 1024
ddp_find_unused_parameters: false

use_peft: true
lora_r: 32
lora_alpha: 32
lora_modules_to_save: ["score"]
lora_target_modules:
  - q_proj
  - k_proj
  - v_proj