# Kahneman-Tversky Optimization
name: ppo

# the temperature parameter for KTO; lower values mean we care less about the reference model
beta: 0.1

trainer: KTOTrainer

dataloader: UnpairedPreferenceDataLoader

use_reference_model: true

# how much to weigh the losses of desirable examples (when dataset is imbalanced)
desirable_weight: 1.0

# how much to weigh the losses of undesirable examples (when dataset is imbalanced)
undesirable_weight: 1.0

critic_coef: 0.5
