---
  record:
    # record_rollouts: ""
    record_recycle: -1
    # log_filename: /nfs/data//object_data/all/randomdist/default/attn/log/inter.log
    # load_dir: /nfs/data//object_data/all/breakout/attn/testrun/
    # save_dir: /nfs/data//object_data/all/randomdist/attn/default/
    # load_intermediate: /hdd/datasets/ac_data/null/1innull
    # save_intermediate: /hdd/datasets/ac_data/null/1innull
  environment:
    env: RandomDAG
    variant: 1-in-n
    # load_environment: /work/pi__umass_edu//object_data/RandomDAG/1_in/
    load_environment: /hdd/datasets/object_data/RandomDAG/1_in_n/
  torch:
    gpu: 0
    no_cuda: False
  pretrain:
    num_iters: 100000
  train:
    train: True
    num_frames: 500000
    train_test_order: time
    # load_rollouts: /work/pi__umass_edu//object_data/RandomDAG/1_in/
    load_rollouts: /hdd/datasets/object_data/RandomDAG/1_in_n/
    num_iters: 10000
    batch_size: 512
    log_interval: 1000
  multi_inter:
    # evaluate: True
    max_combination: 1
    dist_epsilon: 0.1
  inter:
    train_names: $C 
    pretrain_forms: single_passive full
    train_forms: mask inter
    predict_dynamics: True
    # use_active_as_passive: True
  masking:
    adaptive_lasso: 3 1
    adaptive_lasso_bias: -1 -1
  infer:
    infer_types: soft hard mixed nulls
    infer_interval: 1000
    infer_names: $C
  network:
    hidden_sizes: 256
    net_type: parattn
    init_form: xnorm
    embed_dim: 256
    activation: leakyrelu
    factor_net:
      repeat_layers: True
      # final_layers: 512
    mask_attn:
      model_dim: 256
      num_heads: 16
      num_layers: 3
      cluster: False
      num_clusters: 0
      attention_dropout: 0.1
      merge_function: sum
      mask_mode: query
      gumbel_attention: -1
      append_keys: True
      no_hidden: True
    optimizer:
      lr: .0001
      alt_lr: .00002
      eps: .00001
      alpha: 0.99
      betas: 0.9 0.999
      weight_decay: 0.00
...