model_name_or_path: Skywork/Skywork-Reward-Gemma-2-27B-v0.2
dataset_name: trl-lib/ultrafeedback_binarized
dataset_test_split: test
output_dir: outputs/reward/gemma-2-27b-Reward-uf
run_name: gemma-2-27b-Reward-uf
per_device_train_batch_size: 2
per_device_eval_batch_size: 2
gradient_accumulation_steps: 2
num_train_epochs: 1
lr_scheduler_type: cosine
gradient_checkpointing: true
learning_rate: 1.0e-4
logging_steps: 20
eval_strategy: steps
eval_steps: 200
save_strategy: steps
save_steps: 1000
max_length: 1024
ddp_find_unused_parameters: false

use_peft: true
lora_r: 32
lora_alpha: 32
lora_modules_to_save: ["score"]
lora_target_modules:
  - q_proj
  - k_proj
  - v_proj