rl_algorithm: 'sac'
share_encoders: False
policy:
  _target_: rlkit.torch.sac.policies.sequential_gaussian_policy.SequentialGaussianPolicy
  decoder_hidden_sizes:
    - 256
    - 256
  obs_encode_dim: 32
  encoder:
    _target_: rlkit.torch.networks.history_encoders.rnn_encoder.RNNEncoder
    rnn_type: 'gru'
    rnn_hidden_size: 128
    obs_encode_dim: 32
    act_encode_dim: 16
    rew_encode_dim: 16
qnet:
  _target_: rlkit.torch.networks.mlp.SequentialMlp
  hidden_sizes:
    - 256
    - 256
  encoder:
    _target_: rlkit.torch.networks.history_encoders.rnn_encoder.RNNEncoder
    rnn_type: 'gru'
    rnn_hidden_size: 128
    obs_encode_dim: 32
    act_encode_dim: 16
    rew_encode_dim: 16
  obs_act_encode_dim: 64
eval_path_collector:
  _target_: rlkit.samplers.data_collector.path_collector.MdpPathCollector
expl_path_collector:
  _target_: rlkit.samplers.data_collector.path_collector.MdpPathCollector
replay_buffer:
  _target_: rlkit.data_management.naive_sequence_replay_buffer.NaiveSequenceReplayBuffer
  max_replay_buffer_size: 1e6
  batch_window_size: 64
trainer:
  _target_: rlkit.torch.sac.sequential_sac.SequentialSACTrainer
  discount: 0.99
  policy_lr: 3e-4
  qf_lr: 3e-4
  soft_target_tau: 0.005
algorithm:
  _target_: rlkit.torch.torch_rl_algorithm.TorchBatchRLAlgorithm
  num_epochs: 1500
  batch_size: 64
  max_path_length: 1000
  num_eval_steps_per_epoch: 10000
  num_expl_steps_per_train_loop: 1000
  num_trains_per_train_loop: 1000
  min_num_steps_before_training: 10000
  num_train_loops_per_epoch: 1
