default_hyperparameters = {
  "pets": {
    "net_model": [(None, 32), "R", (32, 64), "R", (64, None)],
    "probabilistic": True, # Whether or not dynamics models output standard deviations alongside means.
    "num_random_steps": 2000, # Size of random replay memory (disables random mode when full).
    "batch_size": 256,
    "model_freq": 10, # Number of steps between model updates.
    "lr_model": 1e-3, # Learning rate for dynamics model.
    "replay_capacity": 2000, # Size of replay memory (starts overwriting when full).
    "batch_ratio": 0.9, # Proportion of on-policy transitions.
    "cem_iterations": 5, # Number of rounds of distribution refinement during planning.
    "cem_particles": 50,
    "cem_elites": 10,
    "cem_alpha": 0.1, # Update rate for CEM sampling distribution.
    "cem_temperature": 0.5, # Sharpness of elite weighting for MPPI extension.
    "gamma": 0.99, # Discount factor.
    "rollout": {
      "horizon_params": ("constant", 20),
    }
  },
  "sac": {
    "net_pi": [(None, 256), "R", (256, 256), "R", (256, None)],
    "net_Q": [(None, 256), "R", (256, 256), "R", (256, None)],
    "input_normaliser": None, # Set to "box_bounds" to pre-normalise network inputs.
    "replay_capacity": 10000, # Size of replay memory (starts overwriting when full).
    "batch_size": 256, # Size of batches to sample from replay memory during learning.
    "lr_pi": 1e-4, # Learning rate for policy.
    "lr_Q": 1e-3, # Learning rate for state-action value function.
    "gamma": 0.99, # Discount factor.
    "alpha": 0.2, # Weighting for entropy regularisation term.
    "tau": 0.005, # Parameter for Polyak averaging of target network parameters.
    "update_freq": 1, # Number of timesteps between updates.
  }
}