---
record:
  # record_rollouts: /nfs/data/calebc/object_data/breakout/testrun/paddle/
  record_rollouts: /hdd/datasets/object_data/breakout/testrun/paddle
  record_recycle: -1
  log_filename: logs/temp/paddle_train.log
  load_dir: /hdd/datasets/object_data/breakout/testrun2/
  # save_dir: /hdd/datasets/object_data/breakout/testrun2/
  # checkpoint_dir: /nfs/data/calebc/object_data/breakout/testrun/action_paddle_checkpoint
  # load_checkpoint: /nfs/data/calebc/object_data/breakout/testrun_inline/action_paddle_checkpoint
  save_interval: 100
environment:
  env: Breakout
  variant: drop_stopping
torch:
  gpu: 0
  no_cuda: False
train:
  train: True
  pretrain_frames: 2000
  num_steps: 90
  num_frames: 1000
  train_edge: Action Paddle
  load_rollouts: /hdd/datasets/object_data/breakout/random2
  num_iters: 200
  batch_size: 64
critic_net:
actor_net:
network:
  hidden_sizes: 128 128
  net_type: basic
  activation_final: tanh
  scale_logits: 30
  optimizer:
    lr: .0007
    alt_lr: .0001
    eps: .00001
    alpha: 0.99
    betas: 0.9 0.999
    weight_decay: 0.00
sample:
  sample_type: cent
  sample_distance: 0.3
  sample_schedule: 200
  param_recycle: 3
extract:
  single_obs_setting: 1 0 0 1 0 0
  relative_obs_setting: 0 0 0 0 1
option:
  term_form: combined
  term_as_done: True
  epsilon_close: 1
  param_norm: 1
  constant_lambda: -1
  param_lambda: 1
  inter_lambda: 0
  temporal_extend: 20
  time_cutoff: 30
collect:
  # prioritized_replay: 0.4 0.4
  buffer_len : 100000
  test_episode: True
  max_steps: 1000
  stream_print_file: logs/breakout/ap_option_stream.txt
hindsight:
  use_her: True
policy:
  epsilon_random: 0.3
  # learning_type: rainbow
  # max_min_critic: -10 0
  # tau: 1000
  logging:
    log_interval: 10
    train_log_maxlen: 5
    test_log_maxlen: 50
    initial_trials: 20
    test_trials: 10
    max_terminate_step: 1 30
  learn:
    grad_epoch: 200
    sample_form: merged
...