model:
  target: Models.interpretable_diffusion.gaussian_diffusion.MoD
  params:
    seq_length: 24
    feature_size: 28
    n_layer_enc: 4
    d_model: 256  # 4 X 24
    timesteps: 1000
    sampling_timesteps: 1000
    loss_type: 'l2'
    beta_schedule: 'cosine'
    n_heads: 4
    mlp_hidden_times: 4
    attn_pd: 0.0
    resid_pd: 0.0
    kernel_size: 1
    padding_size: 0
    hidden_dim: 1024
    max_seq_len: 24

solver:
  base_lr: 1.0e-5
  max_epochs: 25000 # 25000
  results_folder: ./Checkpoints_energy
  gradient_accumulate_every: 2
  save_cycle: 2500
  ema:
    decay: 0.995
    update_interval: 10
  
  scheduler:
    target: engine.lr_sch.ReduceLROnPlateauWithWarmup
    params:
      factor: 0.5
      patience: 5000
      min_lr: 1.0e-5
      threshold: 1.0e-1
      threshold_mode: rel
      warmup_lr: 8.0e-4
      warmup: 500 
      verbose: False

dataloader:
  train_dataset:
    target: Utils.Data_utils.real_datasets.CustomDataset
    params:
      name: energy
      proportion: 1.0  # Set to rate < 1 if training conditional generation
      data_root: ./Data/datasets/energy_data.csv
      window: 24  # seq_length
      save2npy: True
      neg_one_to_one: True
      seed: 123
      period: train

  test_dataset:
    target: Utils.Data_utils.real_datasets.CustomDataset
    params:
      name: energy
      proportion: 0.9  # rate
      data_root: ./Data/datasets/energy_data.csv
      window: 24  # seq_length
      save2npy: True
      neg_one_to_one: True
      seed: 123
      period: test
      style: separate
      distribution: geometric
    coefficient: 1.0e-2
    step_size: 5.0e-2
    sampling_steps: 250

  batch_size: 64
  sample_size: 256
  shuffle: True