program: main.py
command:
  - ${env}
  - python3
  - ${program}
  - ${args}
method: grid
metric:
  name: validation/valid/accuracy/deeper_val
  goal: maximize
parameters:
  log:
    value: wandb
  profile:
    value: trafo_scan
  task:
    value: ctl_trafo_classifier
  stop_after:
    value: 30000
  transformer.encoder_n_layers:
    values:
      - 12
      - 10
      - 8
      - 6
      - 4
  transformer.variant:
    value: ndr_geometric
  transformer.n_heads:
    value: 1
  transformer_classifier.result_column:
    value: last
  ctl.reversed:
    values:
      - 0
      - 1
  lr:
    value: 1.5e-4
  batch_size:
    value: 512
  amp:
    value: 0
  state_size:
    value: 256
  dropout:
    value: 0.5
  transformer.attention_dropout:
    value: 0.1
  wd:
    value: 0.01
  ctl.n_more_depth_valid:
    value: 3
  ctl.n_more_depth:
    value: 5
  optimizer:
    value: adamw
  transformer.ff_multiplier:
    value: 2
  length_bucketed_sampling:
    value: 1
  amp:
    value: 1
  ndr.drop_gate:
    value: 0.1
  trafo_classifier.norm_att:
    value: 0
  wandb_bug_workaround:
    value: 1
  sweep_id_for_grid_search:
    distribution: categorical
    values:
      - 1
      - 2
      - 3
      - 4
      - 5
