# @package _global_
env_type: atari

logging_level: 20

experiment:
  num_threads: 1
  wandb_project_name: "Hyperbolic DDQN - Atari"
  track: false

# Env settings
env_id: BreakoutNoFrameskip-v4
total_timesteps: 10000000
num_envs: 1

# Optimizer settings
optimizer:
  algorithm: adam
  learning_rate: 1e-4
  adams_eps: 2.5e-5

# Q-overrides
value_fn:
  curvature: 1.0
  manifold: poincare
  regularization: ln
  feature_scaling: dim
  forward_pass: HNNpp_MLR
  small_weights: false
  manifold_dtype: float64
  manifold_params_dtype: float32

  loss_fn: hlgauss
  loss_num_bins: 51
  loss_min_value: -10.0
  loss_max_value: 10.0

# DDQN settings
learning_rate: 1e-4
embedding_dim: 512
last_layer_tanh: false
batch_size: 32
buffer_size: 1000000
learning_starts: 80000
train_frequency: 4
target_network_frequency: 1000
tau: 1.0
gamma: 0.99
start_e: 1.0
end_e: 0.01
exploration_fraction: 0.10

encoder_log_frequency: 100000
compute_embedding_metrics: false
