# Direct Preference Optimization

# the temperature parameter for DPO; lower values mean we care less about the reference model
beta: 0.1

trainer: DPOTrainer

dataloader: PairedPreferenceDataLoader