# Environment-related
env: "walker2d" #
classdef:                   # The path to the environment class
d4rl_config: ???            # E.g., "medium-expert-v2"
save_dir: ${overrides.d4rl_config}
term_func: ${env.term_func} # Termination function of the environment
reward_func: ${env.reward_func}
obs_preproc: ${env.obs_preproc} # Identity function.. Could use ${env.obs_preproc} instead
obs_postproc: ${env.obs_postproc}
targ_proc: ${env.targ_proc}

replay_buffer_size: 2e6
collector_type: "step"        # 'step' -- one sample at a time, 'path' -- a path at a time
model_dir:                  # Specify the root directory to find for state_dicts

indep_var:
exp_name:

dynamics:
  _target_: rlkit.torch.models.dynamics_models.model.DynamicsModel
  ensemble_model:
    _target_: rlkit.torch.models.probabilistic_ensemble.ProbabilisticEnsemble    # MBOP originally uses a deterministic ensemble
    ensemble_size: 7
    layer_size: 200
    num_hidden_layer: 4
    activation_func: torch.nn.functional.silu         # Elu can also be an option (by MBOP author)
    use_decay: true
  learn_reward: true                  # Whether to learn the reward model as well
  separate_reward_func: ???           # Whether to instantiate a separate module for the reward function
  learn_logstd_min_max: true          # As a deterministic ensemble is used, this is simply set to False
  learning_rate: 1e-3
  batch_size: 256
  max_epochs_since_last_update: 5
  num_model_learning_epochs:
  num_elites: 5
  normalize_inputs: true
  normalize_outputs: ???
