# @package _global_
general:
    name : 'moses_no_h_marginal'
    gpus : 3
    wandb: 'online'
    test_only: null
    evaluate_all_checkpoints: False
    check_val_every_n_epochs: 1
    val_check_interval: null
    sample_every_val: 10
    samples_to_generate: 256
    samples_to_save: 20
    chains_to_save: 5
    log_every_steps: 50
    final_model_samples_to_generate: 25000
    final_model_samples_to_save: 50
    final_model_chains_to_save: 20
train:
    n_epochs: 300
    batch_size: 256
    save_model: True
    ema_decay: 0.999
    num_workers: 0
model:
    n_layers: 12
    lambda_train: [1, 5]
    transition: "marginal"
    rate_constant: [4., 4., 1.]
    diffusion_steps: 1000
    corrector_entry_time: 0.0
    corrector_num_steps: 10
    corrector_tau_multiplier: 0.1
    hidden_mlp_dims: {'X': 256, 'E': 128, 'y': 64}
    hidden_dims: { 'dx': 256, 'de': 64, 'dy': 128, 'n_head': 8, 'dim_ffX': 256, 'dim_ffE': 128, 'dim_ffy': 256}

dataset:
    remove_h: True
