# from email.policy import default
from UtilsRL.misc.namespace import NameSpace
from offlinerl.utils.env import get_env

task = None
seed = 181220014
tb_log_path = "./tb_redm"
exp_name = "redm"

default_path = "./out_redm"


class ablation(NameSpace):
    sac_embedding_infer = "concat"
    clip_obs = True
    probe_mode = "PBT"
    probe_init = True
    sl = False


class debug(NameSpace):
    de = False


class email(NameSpace):
    to = None
    account = None
    password = None


dynamics_path = default_path + "/dynamics"
bc_policy_path = default_path + "/bc_policy"
mainloop_path = default_path + "/mainloop"
# probe_policy_path = default_path + "/probe_policy"
# candidate_model_path = default_path + "/candidate_model"
# meta_policy_path = default_path + "/meta_policy"

mainloop_save_interval = 20

cc = 0.0000
start_epoch = 0
total_epoch = 100
test_mode = False
####### train
soft_expanding = 0.05
horizon = 10
min_horizon = 10
max_horizon = 100
env_pool_size = None  # will be computed in run time
# model_pool_size = 50000  # TODO 检查一下和env_pool_size的比例

real_data_ratio = 0.5
eval_type = 'grav'


####### train dynamics
class Dynamics(NameSpace):
    init_num = 20
    select_num = 14
    hidden_layer_size = 200
    hidden_layer_num = 3
    batch_size = 256
    lr = 1e-3
    l2_loss_coef = 0.000075
    # l2_loss_coef = 0.0
    normalizer = "static"
    max_epoch = 150
    min_epoch = 20
    eval_with_var_loss = False
    train_with_clip_loss = True


###### SAC Agent
maple = False
q_target_clip = True
rnn_hidden_dim = 128
# rnn_hidden_dim = 32
emb_hidden_dim = 64
rnn_layer_num = 1
decoder_hidden_dims = (16,)
head_hidden_dims = (256, 256)
actor_lr = 1e-4
critic_lr = 3e-4
discount = 0.99
soft_target_tau = 5e-3
learnable_alpha = True
eval_runs = 4

policy_hidden_dims = (256, 256)
value_hidden_dims = (256, 256)


###### BC stage
class BC(NameSpace):
    train_epoch = 3000
    train_update = 100
    batch_size = 256
    reset_interval = 4
    bc_loss_coeff = 1.


###### Candidate
class Candidate(NameSpace):
    train_epoch = 1
    train_update = 500
    ratio_aux = 0.1
    num_policy = 1
    train_batch_size = 64
    candidate_batch_size = 64
    candidate_horizon = 10
    seed = 20
    search_horizon = 5

    pg_coef = 0.01

    lr = 3e-4
    l2_loss_coef = 0.000075
    # l2_loss_coef = 0.0
    ppo_epoch = 3
    ppo_batch_size = 64
    cons_batch_size = 256


class Meta(NameSpace):
    model_pool_size = 250000
    rollout_batch_size = 50000
    model_sample_size = 14

    lam = -0.25

    train_batch_size = 256
    init_epoch = 0
    train_epoch = 20
    train_update = 1000

    reset_interval = 4
    max_model = 20
    # save_interval = 20

class Eval(NameSpace):
    num_traj = 5
