# @package _global_
# PPO config for Atari

# Selected environment type
env_type: atari

# Algorithm specific
experiment:
  wandb_project_name: "Hyperbolic RL - Atari"
  track: false

# Actor overrides
policy:
  curvature: 1.0
  manifold: poincare
  regularization: sn
  feature_scaling: dim
  forward_pass: HRL_forward
  small_weights: false
  manifold_dtype: float32
  manifold_params_dtype: float32

# Critic overrides
value_fn:
  curvature: 1.0
  manifold: poincare
  regularization: sn
  feature_scaling: dim
  forward_pass: HRL_forward
  small_weights: false
  manifold_dtype: float32
  manifold_params_dtype: float32

  loss_fn: mse
  loss_num_bins: 51
  loss_min_value: -10.0
  loss_max_value: 10.0

optimizer:
  algorithm: adam
  learning_rate: 0.00025
  adam_eps: 1e-05 # Only for Adam

# Env
env_id: BreakoutNoFrameskip-v4
total_timesteps: 10000000

# PPO
num_envs: 8
clip_coef: 0.1
ent_coef: 0.01
vf_coef: 0.5
max_grad_norm: 0.5
target_kl: null
embedding_dim: 512
