# @package _global_

defaults:
  - /model: flat_enc_dec
  - /dataset: flat
  - /train: nlb
dataset:
  datasets:
  - odoherty_rtt-Indy-20160627_01
  eval_datasets:
  - odoherty_rtt-Indy-20160627_01
  scale_limit_per_eval_session: 300 # no limit
  max_channels: 288
  neurons_per_token: 32
model:
  neurons_per_token: 32
  hidden_size: 128

  dropout: 0.4
  transformer:
    dropout: 0.4
    n_heads: 2
  task:
    mask_ratio: 0.25
    task_weights: [1., 1.]
    tasks:
    - ModelTask.shuffle_infill
    # - ModelTask.heldout_decoding
    # metrics:
    # - Metric.bps
    # - Metric.co_bps
    # - Metric.block_co_bps
  lr_ramp_steps: 3000
  lr_decay_steps: 10000
train:
  autoscale_batch_size: false
  batch_size: 64
  patience: 5000