defaults:
  - _self_
  - override hydra/hydra_logging: disabled  
  - override hydra/job_logging: disabled  

env_name: ant-dir
num_train_steps: 1000000
replay_buffer_capacity: ${num_train_steps}

dataset_size: 200000

num_seed_steps: 50000
eval_frequency: 50000
# deterministic evaluation by taking the mean of the distribution given by the actor
num_eval_episodes: 10
# stochastic evaluation by sampling from a distribution given by the actor
num_eval_sample_episodes: 50
# starting number for trajectories generated by run_evaluate() in pytorch_sac/train.py
eval_start_num: 0
device: cuda:0

seed: 0
goal_idx: 0
agent_name: 'DroQ'
batch_size: 1024

#device: cpu
# logger
use_wandb: true
project_name: OMRL-SAC-Data
group: ${env_name}
run_name: ${env_name}-${goal_idx}-${now:%Y-%m-%d_%H-%M-%S}
log_dir: ./data/${env_name}/goal_idx${goal_idx}
# video recorder
save_video: false
save_agent: true
env_params:
  n_tasks: 40
  max_episode_steps: 200


agent:
  obs_dim: ??? # to be specified later
  action_dim: ??? # to be specified later
  device: ${device}

  hidden_depth: 2
  hidden_dim: 512
  log_std_min: -5
  log_std_max: 2
  dropout: 0.1
  discount: 0.99

  alpha_lr: 1e-4
  actor_lr: 3e-4
  critic_lr: 1e-4
  tau: 0.005
  
  # entropy term
  alpha: 0.2 # ignore if autotune
  tune_alpha: True
  target_entropy: -${agent.action_dim}
  init_alpha: 0.1
  # accelerate
  compile: False
  cuda_graph: False
  

# hydra configuration    
hydra:  
  output_subdir: null  
  run:  
    dir: .
  
