---
  record:
    # record_rollouts: /nfs/data/calebc/object_data/breakout/testrun/paddle/
    # record_rollouts: /hdd/datasets/object_data/robopushing/testrun/block
    record_recycle: -1
    log_filename: logs/temp/block_train6.log
    load_dir: /nfs/data/calebc/object_data/robopushing/testrun/
    save_dir: /nfs/data/calebc/object_data/robopushing/testrun_block6/
    record_graphs: /nfs/data/calebc/object_data/robopushing/testrun_graphs6/
    # checkpoint_dir: /hdd/datasets/object_data/robopushing/testrun/action_gripper_checkpoint
    # pretrain_dir: /hdd/datasets/object_data/robopushing/testrun/action_gripper_checkpoint
    presave_graph: True
    save_interval: 100
  environment:
    env: RoboPushing
    variant: default
    horizon: 300
  torch:
    gpu: 1
    no_cuda: False
  train:
    train: True
    pretrain_frames: 10000
    num_steps: 100
    num_frames: 1000
    train_edge: Gripper Block
    load_rollouts: /hdd/datasets/object_data/robopushing/testrun/random
    num_iters: 10000
    batch_size: 256
  critic_net:
  actor_net:
  network:
    hidden_sizes: 512 512 512 512
    net_type: basic
    activation_final: tanh
    scale_logits: 1
    optimizer:
      lr: .0005
      alt_lr: .0001
      eps: .00001
      alpha: 0.99
      betas: 0.9 0.999
      weight_decay: 0.0001
  sample:
    sample_type: cent
    sample_distance: 0.25
    # sample_schedule: 200
    # param_recycle: 0.1
  extract:
    single_obs_setting: 1 1 0 1 0 0
    relative_obs_setting: 0 0 0 0 1
  option:
    term_form: param
    term_as_done: True
    epsilon_close: .007
    param_norm: 3
    constant_lambda: -1
    param_lambda: 1
    inter_lambda: 0
    temporal_extend: 3
    time_cutoff: 50
    aggregator:
      sum_rewards: False
  collect:
    buffer_len : 100000
    # prioritized_replay: 0.3 0.4
    test_episode: True
    max_steps: 1000
    stream_print_file: logs/robo/gb_option_stream6.txt
  hindsight:
    use_her: True
    early_stopping: 1
    interaction_criteria: 2
    # select_positive: 0.3
  policy:
    learning_type: ddpg
    epsilon_random: 0.01
    lookahead: 10
    primacy:
      reset_layers: 4
      reset_frequency: 1000
      primacy_iters: 50
      stop_resets: 5000
    logging:
      log_interval: 100
      train_log_maxlen: 30
      test_log_maxlen: 50
      initial_trials: 20
      test_trials: 10
      max_terminate_step: 1 30
    learn:
      post_random_iters: 250
      grad_epoch: 50
      sample_form: merged
  action:
    use_relative_action: True
    relative_action_ratio: 0.15
...