rl_algorithm: 'sac'
obs_type: 'xpidtf'
share_encoders: False
policy:
  _target_: rlkit.torch.sac.policies.gaussian_policy.TanhGaussianPolicy
  hidden_sizes:
    - 24
qnet:
  _target_: rlkit.torch.networks.mlp.FlattenMlp
  hidden_sizes:
    - 256
    - 256
eval_path_collector:
  _target_: rlkit.samplers.data_collector.path_collector.BatchTaskEvalCollector
  split: ${split_type}
expl_path_collector:
  _target_: rlkit.samplers.data_collector.path_collector.BatchMdpPathCollector
replay_buffer:
  _target_: rlkit.data_management.env_replay_buffer.EnvReplayBuffer
  max_replay_buffer_size: 1e6
trainer:
  _target_: rlkit.torch.sac.sac.SACTrainer
  discount: 0.95
  policy_lr: 3e-4
  qf_lr: 3e-4
algorithm:
  _target_: rlkit.torch.torch_rl_algorithm.TorchBatchRLAlgorithm
  num_epochs: 1000
  batch_size: 256
  max_path_length: 100
  num_eval_steps_per_epoch: 10000
  num_expl_steps_per_train_loop: 1000
  num_trains_per_train_loop: 1000
  num_train_loops_per_epoch: 1
