---
  record:
    # record_rollouts: /nfs/data/calebc/object_data/breakout/fixed_limits/ball/
    record_rollouts: /hdd/datasets/object_data/breakout/fixed_limits/ball
    record_recycle: -1
    log_filename: logs/temp/ball_train3.log
    load_dir: /nfs/data/calebc/object_data/breakout/fixed_limits/testrun
    save_dir: /nfs/data/calebc/object_data/breakout/fixed_limits/testrun_ball
    # checkpoint_dir: /nfs/data/calebc/object_data/breakout/testrun/paddle_ball_checkpoint
    save_interval: 100
  environment:
    env: Breakout
    variant: drop_stopping
    render: True
    fixed_limits:  True
  torch:
    gpu: 0
    no_cuda: False
  train:
    train: True
    pretrain_frames: 10000 # 
    num_steps: 250 #
    num_frames: 1000
    train_edge: Paddle Ball
    load_rollouts: /hdd/datasets/object_data/breakout/fixed_limits/temp/
    num_iters: 40000
    batch_size: 128 # 
  critic_net:
  actor_net:
  network:
    activation_final: tanh
    scale_logits: 10
    hidden_sizes: 128 128 128 128 128 128
    net_type: inexp
    input_expand:
      include_relative: True
    optimizer:
      lr: .0001
      alt_lr: .0001
      eps: .00001
      alpha: 0.99
      betas: 0.9 0.999
      weight_decay: 0.00
  sample:
    sample_type: hist
    param_recycle: 0.0
  extract:
    single_obs_setting: 1 1 0 1 0 0
    relative_obs_setting: 0 0 0 0 0
  option:
    term_form: combined
    term_as_done: True
    epsilon_close: 0.5
    param_norm: 1
    constant_lambda: -0.1
    param_lambda: 200
    inter_lambda: 1
    use_binary: True
    negative_true: True
    temporal_extend: 3
    time_cutoff: 300
    interaction_as_termination: True
  collect:
    buffer_len : 300000
    test_episode: True
    max_steps: 1000
    prioritized_replay: 0.2 0.4
    aggregator:
      sum_rewards: False
    stream_print_file: logs/breakout/log_dumps/pb_option_stream3.txt
  hindsight:
    use_her: True
    select_positive: 0.1
    max_hindsight: 20
    interaction_resample: False
    interaction_criteria: 1
    min_replay_len: 3
  policy:
    lookahead: 5
    learning_type: ddpg
    tau: .001
    max_critic: 100
    epsilon_random: 0.1
    logging:
      log_interval: 10
      train_log_maxlen: 5
      test_log_maxlen: 50
      initial_trials: 20 #
      test_trials: 10 #
      max_terminate_step: 1 300
    learn:
      grad_epoch: 50
      sample_form: merged
      post_random_iters: 200
  action:
    use_relative_action: True
    relative_action_ratio: 0.15
    round_values: True
  inline:
    interaction_config: configs/Breakout/fixed_limits/paddle_ball_interaction.yaml
    inpolicy_iters: 10000
    inpolicy_schedule: 300
    inpolicy_times: 4
    policy_intrain_passive: False
    intrain_weighting: 0 200000 100 -1
    policy_inline_iters: 20 20 -1
...