# do DPO preference-based training
name: constraintOptimization

deltaChosen: ???
deltaRejected: ???

epsilon: ???

reference_free: false