# @package _global_

debug: false
device: cuda
seed: ${hydra:job.num}

defaults:
  - /model: ktst
  - /data: ???
  - override /hydra/sweeper: optuna

hydra:
  mode: MULTIRUN
  sweeper:
    sampler:
      seed: 7
    direction: maximize
    n_trials: 100
    n_jobs: 1
    params:
      model.aggregation: ???
      model.attn_variant: ???
      model.d_model: choice(128,256)
      model.nhead_tf: choice(4,8)
      model.nhead_agg: 4 # only for self_attn_all
      model.num_layers_agg: 2 # only for self_attn_all
      model.num_layers_tf_dec: choice(3,4,5,6,7,8)
      model.num_layers_tf_enc: choice(1,2,3,4,5,6)
      model.dim_feedforward: choice(256,512)
      model.dropout: choice(0.05,0.1,0.15,0.2,0.25)
      model.dim_classifier: choice(128,256)
      optimizer.lr: choice(0.00005,0.0001)
      optimizer.weight_decay: choice(0.00001,0.00005)
      data.batch_size: 128
      trainer.eval_modes:
        - val
      trainer.num_epochs: 200
