# general settings
name: RealDenoising_Restormer
model_type: ImageCleanModel
scale: 1
num_gpu: 8  # set num_gpu: 0 for cpu mode
manual_seed: 100

# dataset and data loader settings
datasets:
  train:
    name: TrainSet
    type: Dataset_PairedImage
    dataroot_gt: ./Denoising/Datasets/train/SIDD/target_crops
    dataroot_lq: ./Denoising/Datasets/train/SIDD/input_crops
    geometric_augs: true

    filename_tmpl: '{}'
    io_backend:
      type: disk

    # data loader
    use_shuffle: true
    num_worker_per_gpu: 8
    batch_size_per_gpu: 8

    ### -------------Progressive training--------------------------
    mini_batch_sizes: [8,5,4,2,1,1]             # Batch size per gpu   
    iters: [92000,64000,48000,36000,36000,24000]
    gt_size: 384   # Max patch size for progressive training
    gt_sizes: [128,160,192,256,320,384]  # Patch sizes for progressive training.
    ### ------------------------------------------------------------

    ### ------- Training on single fixed-patch size 128x128---------
    # mini_batch_sizes: [8]   
    # iters: [300000]
    # gt_size: 128   
    # gt_sizes: [128]
    ### ------------------------------------------------------------

    dataset_enlarge_ratio: 1
    prefetch_mode: ~

  val:
    name: ValSet
    type: Dataset_PairedImage
    dataroot_gt: ./Denoising/Datasets/val/SIDD/target_crops
    dataroot_lq: ./Denoising/Datasets/val/SIDD/input_crops
    io_backend:
      type: disk

# network structures
network_g:
  type: Restormer
  inp_channels: 3
  out_channels: 3
  dim: 48
  num_blocks: [4,6,6,8]
  num_refinement_blocks: 4
  heads: [1,2,4,8]
  ffn_expansion_factor: 2.66
  bias: False
  LayerNorm_type: BiasFree
  dual_pixel_task: False


# path
path:
  pretrain_network_g: ~
  strict_load_g: true
  resume_state: ~

# training settings
train:
  total_iter: 300000
  warmup_iter: -1 # no warm up
  use_grad_clip: true

  # Split 300k iterations into two cycles. 
  # 1st cycle: fixed 3e-4 LR for 92k iters. 
  # 2nd cycle: cosine annealing (3e-4 to 1e-6) for 208k iters.
  scheduler:
    type: CosineAnnealingRestartCyclicLR
    periods: [92000, 208000]       
    restart_weights: [1,1]
    eta_mins: [0.0003,0.000001]   
  
  mixing_augs:
    mixup: true
    mixup_beta: 1.2
    use_identity: true

  optim_g:
    type: AdamW
    lr: !!float 3e-4
    weight_decay: !!float 1e-4
    betas: [0.9, 0.999]
  
  # losses
  pixel_opt:
    type: L1Loss
    loss_weight: 1
    reduction: mean

# validation settings
val:
  window_size: 8
  val_freq: !!float 4e3
  save_img: false
  rgb2bgr: true
  use_image: false
  max_minibatch: 8

  metrics:
    psnr: # metric name, can be arbitrary
      type: calculate_psnr
      crop_border: 0
      test_y_channel: false

# logging settings
logger:
  print_freq: 1000
  save_checkpoint_freq: !!float 4e3
  use_tb_logger: true
  wandb:
    project: ~
    resume_id: ~

# dist training settings
dist_params:
  backend: nccl
  port: 29500
