---
record:
  record_rollouts: /hdd/datasets/hype/robopushing/Gripper/
  record_recycle: -1
  load_dir: /hdd/datasets/hype/robopushing/testrun/
  save_dir: /hdd/datasets/hype/robopushing/testrun/
environment:
  env: RoboPushing
  variant: discrete
arg_dict: hype
train_mode: skill # either reward or policy
train_edge: Action Gripper
reward:
  reward_base: -1
  param_reward: 1
  one_mode: False
skill:
  temporal_extend: 2
  policy_iters: 5
  policy_iters_schedule: 700
  num_repeats: 3
  epsilon_random: 0.1
  num_networks: 1
  learning_type: dqn
  buffer_len: 10000
  num_iters: 1000
  obs_components: 0 1 0
  learn:
    grad_epoch: 10
    discount_factor: 0.0
  # epsilon_random: 0.0
  # num_networks: 10
  # learning_type: cmaes
  # learn:
  #   grad_epoch: 1
  #   discount_factor: 0.0
critic_net:
actor_net:
network:
  hidden_sizes: 16
  net_type: mlp
  # activation_final: tanh
  # scale_logits: 1
  optimizer:
    lr: .001
    alt_lr: .0001
    eps: .00001
    alpha: 0.99
    betas: 0.9 0.999
    weight_decay: 0.00
...