---

# Model
model: 'LSTM'
input_size: 64
hidden_size: 256
num_layers: 4

# Training
amp: False
batch_size: 256
length_budget: 16777216  # 256 ** 2 * 256
eval_length_budget: 16777216  # 256 ** 2 * 256
max_train_steps: 800000
eval_data_size: 30000
num_workers: 8
device: cuda
optim: Adam
optim_args:
  lr: 0.001
lr_sched: StepLR
lr_sched_args:
  step_size: 100000
  gamma: 0.5
summary_interval: 250
num_wrong_summary: 20
eval_interval: 20000
ckpt_interval: 10000
