seed_everything: 42

# ---------------------------- TRAINER -------------------------------------------
trainer:
  default_root_dir: /path/to/root_dir

  precision: 16

  devices: null
  num_nodes: 1
  accelerator: gpu
  strategy: ddp_find_unused_parameters_true

  min_epochs: 1
  max_epochs: 50
  enable_progress_bar: true

  sync_batchnorm: True
  enable_checkpointing: True
  num_sanity_val_steps: 1

  # debugging
  fast_dev_run: false

  logger:
    class_path: lightning.pytorch.loggers.wandb.WandbLogger
    init_args:
      project: 'atmos-arena'
      save_dir: ${trainer.default_root_dir}/monthly_infilling_unet_2m_temperature_8bs_5e-4_lr
      name: monthly_infilling_unet_2m_temperature_8bs_5e-4_lr

  callbacks:
    - class_path: lightning.pytorch.callbacks.LearningRateMonitor
      init_args:
        logging_interval: "step"

    - class_path: lightning.pytorch.callbacks.ModelCheckpoint
      init_args:
        dirpath: "${trainer.default_root_dir}/monthly_infilling_unet_2m_temperature_8bs_5e-4_lr/checkpoints"
        monitor: "val/w_mse_0.5" # name of the logged metric which determines when model is improving
        mode: "min" # "max" means higher metric value is better, can be also "min"
        save_top_k: 1 # save k best models (determined by above metric)
        save_last: True # additionaly always save model from last epoch
        verbose: False
        filename: "epoch_{epoch:03d}"
        auto_insert_metric_name: False

    - class_path: lightning.pytorch.callbacks.EarlyStopping
      init_args:
        monitor: "val/w_mse_0.5" # name of the logged metric which determines when model is improving
        mode: "min" # "max" means higher metric value is better, can be also "min"
        patience: 10 # how many validation epochs of not improving until training stops
        min_delta: 0. # minimum change in the monitored metric needed to qualify as an improvement

    - class_path: lightning.pytorch.callbacks.RichModelSummary
      init_args:
        max_depth: -1

    - class_path: lightning.pytorch.callbacks.TQDMProgressBar

# ---------------------------- MODEL -------------------------------------------
model:
  lr: 5e-4
  beta_1: 0.9
  beta_2: 0.95
  weight_decay: 1e-5
  warmup_epochs: 5
  max_epochs: 50
  warmup_start_lr: 1e-8
  eta_min: 1e-8
  pretrained_path: ""

  net:
    class_path: unet_arch.Unet
    init_args:
      in_channels: 1
      out_channels: 1
      hidden_channels: 128
      activation: "leaky"
      norm: True
      dropout: 0.1
      ch_mults: [1, 2, 2, 4]
      is_attn: [False, False, False, False]
      mid_attn: False
      n_blocks: 2

# ---------------------------- DATA -------------------------------------------
data:
  root_dir: /path/to/data
  variable: 2m_temperature
  training_mask_ratio_min: 0.1
  training_mask_ratio_max: 0.9
  eval_mask_ratios: [0.1, 0.3, 0.5, 0.7, 0.9]
  batch_size: 1
  num_workers: 4
  pin_memory: False