---
  record:
    # record_rollouts: ""
    record_recycle: -1
    # wandb_log: "NCD_box2d"
    # log_filename: /nfs/data//object_data/all/randomdist/default/attn/log/inter.log
    # load_dir: /nfs/data//object_data/all/pusher2d/attn/testrun/
    # save_dir: /nfs/data//object_data/all/randomdist/attn/default/
    # load_intermediate: /hdd/datasets/ac_data/jaci/1innrp
    # save_intermediate: /hdd/datasets/ac_data/jaci/1innrp
  environment:
    env: Box2D
    variant: default
    # load_environment: /work/pi__umass_edu//object_data/pusher2d/tiny/
    # load_environment: /data//object_data/pusher2d/tiny/
  torch:
    gpu: 0
    no_cuda: False
  pretrain:
    num_iters: 50000
  train:
    train: True
    num_frames: 10000000
    train_test_order: time
    # load_rollouts: /work/pi__umass_edu//object_data/pusher2d/tiny/
    # load_rollouts: /data//object_data/box2d/default/
    load_rollouts: /datastor1//object_data/box2d/default/
    num_iters: 500000
    batch_size: 512
    log_interval: 1000
    param_update_frequency: 10000
  inter:
    train_names: Target
    pretrain_forms: single_passive full
    train_forms: mask_both
    predict_dynamics: True
    passive_weighting: -2 100000
    weighting_type: passive_error
    use_active_as_passive: False
    masking:
      masking_form: mixed
  active:
    weighting: 5 -1
    interaction_schedule: 30000
  infer:
    infer_types: soft
    infer_interval: 1000
    infer_names: Target
    train_weight_infer: sample_active_weights
    eval_weight_infer: trace_weights
  masking:
    weighting: 10 -1
    lasso: 0.3 -1
    # adaptive_lasso: 2 -1
    # adaptive_lasso_bias: 2 1
  interaction_net:
    hidden_sizes: 512 512 512 512
    net_type: flatpair
    init_form: xnorm
    # embed_dim: 512
    activation: leakyrelu
    factor_net:
      repeat_layers: False
      reduce_function: sum
      num_pair_layers: 1
      append_mask: True
      append_keys: False
      # final_layers: 512
    mask_attn:
      model_dim: 256
      num_heads: 16
      num_layers: 2
      cluster: False
      num_clusters: 0
      attention_dropout: 0.1
      merge_function: cat
      mask_mode: query
      gumbel_attention: -1
      append_keys: True
      no_hidden: True
    optimizer:
      lr: .0005
      alt_lr: .0005
      eps: .00001
      alpha: 0.99
      betas: 0.9 0.999
      weight_decay: 0.00001
...