defaults:
  - _self_
  - env: "dag_gfn"
  - algorithm: "db"

replay:
  _target_: "gfn_maxent_rl.data.ReplayBuffer"

  # Capacity of the replay buffer
  capacity: 100_000

# Number of parallel environments
num_envs: 8

env_wrapper:
  _target_: "gfn_maxent_rl.envs.wrappers.RewardCorrection"
  alpha: 1.0

# Number of iterations with a random policy to prefill the replay buffer
prefill: 1000

exploration:
  # Minimum value of epsilon-exploration
  min_exploration: 0.1

  # Number of steps for warming up exploration
  warmup: 50_000  # 100_000 / 2

# Learning rate
lr: 1e-4

# Batch size
batch_size: 128

# Number of iterations
num_iterations: 100_000

# Random seed
seed: 0

# Frequency for logging
log_every: 500

# Frequency for evaluation
evaluation_every: 500

# Evaluation batch size
evaluation_batch_size: 128

reward_correction: false
