# @package _global_
# Env parameters
env_name: Walker2d-v4
wrapper_class: WalkerWrapper
wrapper_kwargs: {
                "reward_path": None,
                "env_name": Walker2d-v4,
                "scaler_path": None,
                "configs": None
                }

# sac
agent_name: sb_sac
n_envs: 4
batch_size: 256
n_cpu: ${n_envs}
policy_type: "MlpPolicy"
# pi_size: 128
vf_size: ${pi_size}
policy_kwargs: 
verbose: 0
tensorboard_log: 'checkpoints/${exp_name}/logs/'
use_sde: False
sde_sample_freq: 20
learning_starts: 10000
ent_coef: 'auto'
tau: 0.01
init_total_timesteps: 1e6
init_learning_rate: 0.0003
init_gamma: 0.99
init_pi_size: 256


learning_rate: ${init_learning_rate}
total_timesteps: ${init_total_timesteps}
gamma: ${init_gamma}
pi_size: ${init_pi_size}
path_to_expert : "data/Walker2d-v4/ppo_expert"
path_to_data : "data/Walker2d-v4/"
path_to_basis: "cubic"

train_freq: 1
gradient_steps: 1
use_adam: True

# Algo parameters
algo_name: maxent
d_states: 3
feats_selected: []
feats_method: 'random'
lr : 0.03
gamma_feat: 0.99
epochs: 50
alpha_decay: 0.99
len_traj: 1000
n_trajs: 300
samples_per_state: 1
scaler_params: None

# # sweep parameters
# hydra:
#   sweeper:
#     params:
#       lr: choice(0.2, 0.4, 0.6, 0.8, 1.0)
#       gamma_feat: range(0.92, 1, 0.02)
#       n_trajs: choice(100, 200, 300)
#       len_traj: choice(100, 150, 200)
#       epochs: choice(30, 40, 50)
#       total_timesteps: choice(1e6, 1.5e6, 2e6)
#       learning_rate:  choice(1e-5, 5e-5, 8e-5)
#       gamma: range(0.94, 1, 0.02)
#       pi_size:  choice(128, 256, 512)