# PPO config for MiniGrid
defaults:
  - /experiment: default
  - /policy: default
  - /value_fn: default
  - /optimizer: default
  - /envs/ppo: minigrid
  - _self_

logging_level: 20

# Selected environment type
env_type: minigrid

# Algorithm specific
experiment:
  wandb_project_name: "Hyperbolic RL"
  track: false
  exp_name: hyperbolic_ppo_gridworld

policy:
  curvature: 1.0
  manifold: hyperboloid
  regularization: rms
  feature_scaling: dim
  forward_pass: HNNpp_MLR
  small_weights: false
  manifold_dtype: float32
  manifold_params_dtype: float32

# Critic overrides
value_fn:
  curvature: 1.0
  manifold: hyperboloid
  regularization: rms
  feature_scaling: dim
  forward_pass: HNNpp_MLR
  small_weights: false
  manifold_dtype: float32
  manifold_params_dtype: float32
  # Value loss parameters
  loss_fn: mse
  loss_num_bins: 51
  loss_min_value: -10.0
  loss_max_value: 10.0

optimizer:
  algorithm: adam
  learning_rate: 0.0005
  adam_eps: 1e-05 # Only for Adam
  encoder_weight_decay: 0.0

# Env
# MiniGrid-Empty-16x16-v0 (8x8 also possible)
env_id: MiniGrid-Empty-16x16-v0
total_timesteps: 100000
dense_reward: true
disable_orientation: false

# PPO
num_envs: 32
num_steps: 128
num_minibatches: 16
update_epochs: 10 # 12
gamma: 0.99
gae_lambda: 0.95
norm_adv: true
ent_coef: 0.001
max_grad_norm: 0.5 # 1.5
target_kl: null
feat_reg_coef: 0.0
clip_coef: 0.2 # 0.4
vf_coef: 0.4 # 0.15
embedding_dim: 2
shared_encoder: true
last_layer_tanh: true
compute_embedding_metrics: true

# model saving
save_agent: true
save_interval: 100000

# runtime-computed fields (kept for completeness; not used by Hydra directly)
batch_size: ???
minibatch_size: ???
num_iterations: ???