# conservative Direct Preference Optimization

# the temperature parameter for cDPO; lower values mean we care less about the reference model
beta: 0.1

# proportion of preferences with the wrong label
epsilon: 0.2

trainer: CDPOTrainer

dataloader: PairedPreferenceDataLoader