# @package _global_
general:
    name : 'moses'
    gpus : [0]
    wandb: 'online'
    remove_h: True
    resume: null
    test_only: null
    check_val_every_n_epochs: 5
    val_check_interval: null
    sample_every_val: 1
    samples_to_generate: 100
    samples_to_save: 10
    chains_to_save: 0
    log_every_steps: 50

    final_model_samples_to_generate: 30000
    final_model_samples_to_save: 50
    final_model_chains_to_save: 1

train:
    n_epochs: 300
    batch_size: 256
    save_model: True
    lr: 2e-4
    num_workers: 4
model:
    n_layers: 12
    lambda_train: [1, 2, 0, 0, 0] #Old: p,x,c,e,y; new = x,e,y,c,p
    nu:
      p: 2.5
      x: 1
      c: 1
      e: 1.5
      y: 1
    
    extra_features: null
    hidden_mlp_dims: { 'X': 256, 'E': 128, 'y': 256, 's': 256}
    hidden_dims: { 'dx': 256, 'de': 64, 'dy': 128, 'n_head': 8, 'dim_ffX': 256, 'dim_ffE': 128, 'dim_ffy': 256}
    n_layers_delt: 1
    hidden_mlp_dims_delt: { 'X': 128, 'E': 64, 'y': 128, 's': 8}
    hidden_dims_delt: { 'dx': 128, 'de': 32, 'dy': 64, 'n_head': 8, 'dim_ffX': 128, 'dim_ffE': 64, 'dim_ffy': 128}
dataset:
    name: 'moses'
    datadir: 'data/moses/'
    remove_h: True
    random_subset: null
    pin_memory: True
    adaptive_loader: True
features:
    use_charges: False
    charges_policy: "no"