defaults:
  - agent: q_learning
  - monitor: room
  - _self_

environment:
  id: gym_monitor/Plants-Watering-v1
  grid_size: [10, 10]
  n_plants: 8
  plants_dryness_prob: 0.05
  dry_difference: 0.5
  agent_start_pos: [0, 0]
  max_episode_steps: 100
  n_walls: 5
  window_size: 11
  add_new_plants: False
  add_more_plants: False

experiment:
  training_timesteps: 2e7
  testing_episodes: 5
  testing_frequency: 5e5
  rng_seed: 1
  start_train_timestep: 1e5
  replay_buffer_size: 1e6
  batch_size: 128
  n_itr_episode: 50
  update_target_freq: 5
  save_log: True

hydra:
  sweeper:
    params:
      experiment.rng_seed: range(0, 10)
#      agent.critic.strategy: reward_model, q_monitor_joint
      # environment.id: MiniGrid-DoorKey-5x5-v0, Taxi-v3
      # monitor.id: BinaryMonitor
#      monitor.monitor_reset_prob: 0., 0.1, 0.5, 1.
      # monitor.monitor_cost: 0., 0.01, 0.1, 0.5
#      agent.critic.lr: 0.05, 0.1
#      agent.critic.reward_model.lr: 0.01, 0.05
#       agent.critic.q0: 0., 1., 10.
      # agent.actor.eps_decay: 0.01, 0.001, 0.0001
