name: "mvepo"
replay_buffer_size: ${overrides.replay_buffer_size}
collector_type: "path"
model_dir: ${overrides.model_dir}

eval_transition_model:      # true / false

agent:
  _target_: rlkit.examples.algorithms.actor_critic.wrapper.SACAgent
  obs_dim: ???
  action_dim: ???
  layer_size: 256
  num_hidden_layer: 3
  num_qfs: ${overrides.trainer_cfg.num_qfs}
  w_init_method: 

dynamics:
  _target_: ${overrides.dynamics._target_}
  ensemble_model: ${overrides.dynamics.ensemble_model}
  learn_reward: ${overrides.dynamics.learn_reward}
  separate_reward_func: ${overrides.dynamics.separate_reward_func}
  learn_logstd_min_max: ${overrides.dynamics.learn_logstd_min_max}
  num_elites: ${overrides.dynamics.num_elites}
  normalize_inputs: ${overrides.dynamics.normalize_inputs}
  normalize_outputs: ${overrides.dynamics.normalize_outputs}
  clip_obs: ${overrides.dynamics.clip_obs}
  use_true_reward: ${overrides.use_true_reward}
  reward_propagation_method: all

# Note: no policy training should occur for MVE(lambda); pre-trained models should be loaded
algorithm_cfg:
  batch_size: 256
  max_path_length: 1000                 # The maximum path length
  num_epochs: 400                       # The main training epochs of an RL algorithm
  num_eval_steps_per_epoch: 5000
  num_expl_steps_per_train_loop: 1000    # Each epoch can have multiple loops.. how many expl steps per loop
  num_trains_per_train_loop: 1000       # this number // # expl steps per loop = # trains per expl step
  num_train_loops_per_epoch: 1          # Another loop within each epoch (default=1)
  min_num_steps_before_training: 5000   # Initial exploration with random policy
  max_epochs_since_last_update: 5
  num_model_trains_per_train_loop: 1    #
  save_snapshot_gap: 5
  save_best_parameters: true
  rand_init_policy_before_training: true  # Use the random policy for initial exploration
  timeout:

get_module_path: rlkit.examples.algorithms.actor_critic.mvepo.prepare_models_and_trainers
get_algo_path: rlkit.examples.algorithms.actor_critic.mvepo.prepare_generic_rl_algorithm
