# Kahneman-Tversky Optimization with a zero reward reference point (de facto similar to unlikelihood training by Welleck et al. (2019))
# DO NOT USE kto-zero in practice: this is just for understanding the importance of the KL term

# the temperature parameter for KTO; lower values mean we care less about the reference model
beta: 0.1

trainer: KTOZeroTrainer

dataloader: UnpairedPreferenceDataLoader

# how much to weigh the losses of desirable examples (when dataset is imbalanced)
desirable_weight: 1.0

# how much to weigh the losses of undesirable examples (when dataset is imbalanced)
undesirable_weight: 1.0

# use tokenwise KL estimator
tokenwise_KL: false