# @package _global_

trainer:
  _target_: pkg.logic.trainer.Trainer
  _recursive_: false
  num_epochs: 200
  early_stopping_tolerance: 10
  gradnorm_clipping_value: 1.0
  log_after_num_iterations: 1
  eval_modes:
    - val
  dry_run: false
model:
  _target_: pkg.model.KTST.Model
  _recursive_: false
  d_model: 128
  nhead_tf: 8
  nhead_agg: 4
  num_layers_tf_dec: 3
  num_layers_tf_enc: 5
  num_layers_agg: 2
  dim_feedforward: 512
  dropout: 0.05
  dim_classifier: 256
  aggregation: self_attn_all
  use_bias_emb: true
  use_zero_init: true
  use_decoder_only: false
  attn_variant: learnable_alibi_monotonic_q_k
data:
  format: set_dense
  _target_: pkg.data.pykt.Data
  _recursive_: false
  dataset: algebra2005
  batch_size: 128
  batch_size_val: 64
  val_fold_idx: 3
optimizer:
  _target_: torch.optim.Adam
  _recursive_: false
  lr: 5.0e-05
  weight_decay: 5.0e-05
  theta_lr: 0.0005
data_path: /home/knowledge-tracing/data
debug: false
seed: 93
device: cuda
