distributed_training:
  equal_variance_scaling_strategy: true  # scales lr and betas either linearly if false (multiply by num GPUs) or with equal_variance if true (multiply by sqaure root of num GPUs)

trainer:
  checkpoint:
    resume_training: false  # load the model from the last epoch and resume training
    save_top_k: 5  # save the top K models in terms of performance
    monitor: loss/val_loss  # metric to monitor for performance
    mode: min  # minimize/maximize metric

  params:
    # max_time: 00:16:00:00  # training time before the process is terminated

    max_epochs: ${epochs}  # maximum number of training epochs
    # check_val_every_n_epoch: 1  # run validation set every n training epochs
    val_check_interval: 1.0  # [%] run validation set every X% of training set

    limit_train_batches:  # how much of training dataset to check (float = fraction, int = num_batches)
    limit_val_batches:  # how much of validation dataset to check (float = fraction, int = num_batches)
    limit_test_batches:  # how much of test dataset to check (float = fraction, int = num_batches)

    devices: -1  # number of GPUs to utilize (-1 means all available GPUs)
    accelerator: gpu  # distribution method
    precision: 32  # floating point precision
    # amp_level: O2  # AMP optimization level
    # num_nodes: 1  # Number of nodes used for training

    # auto_scale_batch_size: false
    # auto_lr_find: false  # tunes LR before beginning training
    # terminate_on_nan: true  # terminates training if a nan is encountered in loss/weights

    num_sanity_val_steps: 1  # number of validation steps to run before training begins
    fast_dev_run: false  # runs 1 batch of train/val/test for sanity

    # accumulate_grad_batches: 1  # accumulates gradients every n batches
    # track_grad_norm: -1  # logs the p-norm for inspection
    gradient_clip_val: 5.0  # value to clip gradients
    gradient_clip_algorithm: norm  # [value, norm] method to clip gradients
    sync_batchnorm: true
    strategy: ddp_find_unused_parameters_false

    # checkpoint_callback: true  # enable default checkpoint

  overfitting:
    enable: false  # run an overfitting test instead of training

    params:
      max_epochs: 150  # number of epochs to overfit the same batches
      overfit_batches: 1  # number of batches to overfit
