# @package _global_
defaults:
  - agent_params: ../../agent_params/erqt
  - env_params: ../../env_params/maze2d_umaze
  - run_params: ../../run_params/base
  - _self_
  - override /hydra/sweeper: optuna
  - override /hydra/launcher: ray

hydra:
  launcher:
    ray:
      init:
        num_gpus: 4
        local_mode: false
        _temp_dir: /home/data_hdd/ray_tmp
      remote:
        num_gpus: 1
        max_calls: 1
  sweeper:
    sampler:
      seed: 123
    direction: maximize
    study_name: maze2d_umaze_optimization
    storage: null  # Use in-memory storage
    n_trials: 200
    n_jobs: 8      # Number of parallel jobs
    params:
      run_params.learning_rate: choice(0.0001, 0.0003, 0.0005, 0.001)
      run_params.lr_decay: choice(true, false)
      run_params.alpha: choice(1e-6, 1e-5, 1e-4, 1e-3, 0.01, 0.1, 0.5, 1.0, 10.0)
      seed: choice(0, 42, 123)
  sweep:
    dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
    subdir: ${hydra.job.num}

seed: 123
device: cuda
log_to_wandb: true
save_path: ./save/
exp_name: hpo

wandb_params:
  project: erqt
  monitor_gym: True
  save_code: True