# --- Anakin config ---

# --- Training ---
seed: 0  # RNG seed.
total_num_envs: 1024  # Total Number of vectorised environments.
                      # If using sinle seed - need to be divisible by number of devices.
                      # The code creates another arch.num_envs which is envs per device.
                      # If using multiple seeds defines the num_envs per seed (each on different device)
total_timesteps: 1e9 # Set the total environment steps.
# If unspecified, it's derived from num_updates; otherwise, num_updates adjusts based on this value.
num_updates: ~ # Number of updates

# --- Evaluation ---
evaluation_greedy: False # Evaluate the policy greedily. If True the policy will select
  # an action which corresponds to the greatest logit. If false, the policy will sample
  # from the logits.
num_eval_episodes: 128 # Number of episodes to evaluate per evaluation.
num_evaluation: 10 # Number of evenly spaced evaluations to perform during training.
absolute_metric: True # Whether the absolute metric should be computed. For more details
  # on the absolute metric please see: https://arxiv.org/abs/2209.10485
