# token-conditioned supervised finetuning, in the style of Korbak et al.'s (2023) "Pretraining Models with Human Feedback."
# i.e., add a <good> or <bad> token prior to the output during training, then postpend <good> to the input for inference
name: csft

trainer: SFTTrainer

dataloader: ConditionalSFTDataLoader

use_reference_model: false

chosen_control_token: "<|good|>"

rejected_control_token: "<|bad|>"