# pytorch_lightning==2.1.3
seed_everything: true
trainer:
  accelerator: auto
  strategy: auto
  devices: auto
  num_nodes: 1
  precision: null
  callbacks:
    - class_path: pytorch_lightning.callbacks.ModelCheckpoint
      init_args:
        monitor: Val/loss
        verbose: true
        save_top_k: 1
        mode: min
    - class_path: pytorch_lightning.callbacks.EarlyStopping
      init_args:
        monitor: Val/loss
        patience: 25
        mode: min
  fast_dev_run: false
  max_epochs: 5000
  min_epochs: null
  max_steps: -1
  min_steps: null
  max_time: null
  limit_train_batches: null
  limit_val_batches: null
  limit_test_batches: null
  limit_predict_batches: null
  overfit_batches: 0.0
  val_check_interval: null
  check_val_every_n_epoch: 250
  num_sanity_val_steps: null
  log_every_n_steps: null
  enable_checkpointing: null
  enable_progress_bar: null
  enable_model_summary: null
  accumulate_grad_batches: 1
  gradient_clip_val: null
  gradient_clip_algorithm: null
  deterministic: null
  benchmark: null
  inference_mode: true
  use_distributed_sampler: true
  profiler: null
  detect_anomaly: false
  barebones: false
  plugins: null
  sync_batchnorm: false
  reload_dataloaders_every_n_epochs: 0
  default_root_dir: null
model:
  num_tokens: 6
  dim_model: 8
  dim_feedforward: 128
  num_heads: 4
  test_prompt_length: 6
  max_pred_length: 64
  num_decoder_layers: 2
  dropout_p: 0.1
  lr: 0.01
  layer_norm_eps: 2e-4
  model: transformer
data:
  num_train: 512
  num_val: 512
  num_test: 512
  max_length: 512
  batch_size: 128
  grammar: aNbNcN
notes: null
tags: null
ckpt_path: null
