_target_: src.model.lm.LanguageModel

defaults:
  - optimizer: hf_adamw
  - scheduler: fixed

model_type: kd-lm
arch: t5-base
aux_arch: null
model_max_length: 512
dataset: ${data.dataset}

kd_input: True
kd_target: True
kd_criterion: mse
kd_loss_wt: 1.0
aux_lm_only: False

no_task_loss: True
no_bottleneck: False

ftr_dropout_rate: null