# Kahneman-Tversky Optimization

# the temperature parameter for KTO; lower values mean we care less about the reference model
beta: 0.1

trainer: KTOTrainer

dataloader: UnpairedPreferenceDataLoader

# how much to weigh the losses of desirable examples (when dataset is imbalanced)
desirable_weight: 1.0

# how much to weigh the losses of undesirable examples (when dataset is imbalanced)
undesirable_weight: 1.0

# use tokenwise KL estimator
tokenwise_KL: false