# @package _global_
# Env parameters
env_name: Hopper-v4

wrapper_class: HopperWrapper
wrapper_kwargs: {
                "reward_path": None,
                "env_name": Hopper-v4,
                "scaler_path": None,
                "configs": None
                }

scaler_params: None

# Agent Policy  parameters
agent_name: sb_sac

n_envs: 8
batch_size: 2048
n_cpu: ${n_envs}
policy_type: "MlpPolicy"
# pi_size: 128
vf_size: ${pi_size}
policy_kwargs: 
verbose: 0
tensorboard_log: 'checkpoints/${exp_name}/logs/'
use_sde: False
sde_sample_freq: -1
learning_starts: 10000
ent_coef: auto
tau: 0.005
init_total_timesteps: 1e6
init_learning_rate: 3e-4
init_gamma: 0.995
init_pi_size: 256


# ██╗██████╗ ██╗     
# ██║██╔══██╗██║     
# ██║██████╔╝██║     
# ██║██╔══██╗██║     
# ██║██║  ██║███████╗
# ╚═╝╚═╝  ╚═╝╚══════╝
                   
learning_rate: ${init_learning_rate}
total_timesteps: ${init_total_timesteps}
gamma: ${init_gamma}
pi_size: ${init_pi_size}

# expert load or train
path_to_expert : 'data/Hopper-v4/ppo_expert'
path_to_data : 'data/Hopper-v4/'
path_to_basis : 'prop_2'

# Algo parameters
algo_name: maxent

d_states: 3
feats_selected: []
feats_method: 'random'
use_adam: True
lr : 0.2
gamma_feat: 0.99
epochs: 100
alpha_decay: 0.97

len_traj: 1000
n_trajs: 100
samples_per_state: 1

# # sweep parameters
# hydra:
#   sweeper:
#     params:
#       lr: choice(0.2, 0.4, 0.6, 0.8, 1.0)
#       gamma_feat: range(0.92, 1, 0.02)
#       n_trajs: choice(100, 200, 300)
#       len_traj: choice(100, 150, 200)
#       epochs: choice(30, 40, 50)

#       total_timesteps: choice(1e6, 1.5e6, 2e6)
#       learning_rate:  choice(1e-5, 5e-5, 8e-5)
#       gamma: range(0.94, 1, 0.02)
#       pi_size:  choice(128, 256, 512)
