# for optuna

#https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml
def modify_parameters_by_environment(trial, params, env_id): 
    if False:
        if env_id == 'CartPole-v1':
            params["n_steps"] = trial.suggest_int("n_steps", 16, 64)
            params["n_envs"] = trial.suggest_int("n_envs", 4, 16)          
        elif env_id == 'Pendulum-v1':
            params["n_steps"] = trial.suggest_int("n_steps", 256, 512)
            params["n_envs"] = trial.suggest_int("n_envs", 2, 8)  
        elif env_id == 'BipedalWalker-v3':
            params["n_steps"] = trial.suggest_int("n_steps", 256, 512)
            params["n_envs"] = trial.suggest_int("n_envs", 16, 32)             
        elif env_id == 'LunarLander-v2':
            params["n_steps"] = trial.suggest_int("n_steps", 256, 512)
            params["n_envs"] = trial.suggest_int("n_envs", 8, 16)      
    else:
        #params["n_steps"] = trial.suggest_categorical("n_steps", [128, 256, 512]) #trial.suggest_int("n_steps", 128, 512)
        params["n_steps"] = trial.suggest_categorical("n_steps", [4, 8, 32]) #trial.suggest_int("n_steps", 128, 512)
        params["n_envs"] = trial.suggest_int("n_envs", 8, 32)      
    return params

    
def suggest_config_sympol(trial, env_id=''):
    params = {
        "learning_rate_actor_weights": trial.suggest_float("learning_rate_actor_weights", 0.0001, 0.1, log=True),
        "learning_rate_actor_split_values": trial.suggest_float("learning_rate_actor_split_values", 0.0001, 0.05, log=True),
        "learning_rate_actor_split_idx_array": trial.suggest_float("learning_rate_actor_split_idx_array", 0.0001, 0.1, log=True),
        "learning_rate_actor_leaf_array": trial.suggest_float("learning_rate_actor_leaf_array", 0.0001, 0.05, log=True),
        "learning_rate_actor_log_std": trial.suggest_float("learning_rate_actor_log_std", 0.0001, 0.1, log=True),
        
        "learning_rate_critic": trial.suggest_float("learning_rate_critic", 0.0001, 0.01, log=True),
        
        "SWA": trial.suggest_categorical("SWA", [True]), #FALSE | TRUE
        "adamW": trial.suggest_categorical("adamW", [True]), #TRUE
        "reduce_lr": trial.suggest_categorical("reduce_lr", [True, False]), #FALSE | TRUE
        
        "dropout": trial.suggest_categorical("dropout", [0.0]), #IMPLEMENT
        #"depth": trial.suggest_int("depth", 9),
        "depth": trial.suggest_categorical("depth", [7]),
        
        #"n_minibatches": trial.suggest_int("n_minibatches", 1, 16),
        #"minibatch_size": trial.suggest_categorical("minibatch_size", [64]),
        #"accumulate_gradients_every": trial.suggest_categorical("accumulate_gradients_every", [1,1]),
        
        #"n_update_epochs": trial.suggest_int("n_update_epochs", 1, 10),
        #"max_grad_norm": trial.suggest_categorical("max_grad_norm", [0.1, 0.5, 1.0, 1000]),
        "max_grad_norm": trial.suggest_categorical("max_grad_norm", [1000]),
        "norm_adv": trial.suggest_categorical("norm_adv", [True, False]),
        
        "ent_coef": trial.suggest_categorical("ent_coef", [0.0, 0.1, 0.2, 0.5]),
        "vf_coef": trial.suggest_categorical("vf_coef", [0.25, 0.5, 0.75]),
        "gamma": trial.suggest_categorical("gamma", [0.9, 0.95, 0.99, 0.999]), #no 0.8
        "gae_lambda": trial.suggest_categorical("gae_lambda", [0.8, 0.9, 0.95, 0.99]), #INCLUDE

        "dynamic_buffer": trial.suggest_categorical("dynamic_buffer", [True, False]),
        
        #"clip_coef": trial.suggest_float("clip_coef", 0.1, 0.5),
        #"n_estimators": trial.suggest_int("n_estimators", 10, 100),
        
    }
    params = modify_parameters_by_environment(trial, params, env_id)
 
    
    return params

def suggest_config_mlp(trial, env_id=''):
    params = {
        "num_layers": trial.suggest_int("num_layers", 1, 3),
        "neurons_per_layer": trial.suggest_int("neurons_per_layer", 16, 256),
        
        "learning_rate_actor": trial.suggest_float("learning_rate_actor", 0.0001, 0.01, log=True),
        "learning_rate_critic": trial.suggest_float("learning_rate_critic", 0.0001, 0.01, log=True),
        "reduce_lr": trial.suggest_categorical("reduce_lr", [False]),
        "adamW": trial.suggest_categorical("adamW", [False]),
        
        #"minibatch_size": trial.suggest_categorical("minibatch_size", [64, 128, 256, 512]),
        
        #"n_update_epochs": trial.suggest_int("n_update_epochs", 1, 10),
        "max_grad_norm": trial.suggest_categorical("max_grad_norm", [0.1, 0.5, 1.0, 1000]),
        "norm_adv": trial.suggest_categorical("norm_adv", [True, False]),
        "ent_coef": trial.suggest_categorical("ent_coef", [0.0, 0.1, 0.2]),
        "vf_coef": trial.suggest_categorical("vf_coef", [0.25, 0.5, 0.75]),       

        "gamma": trial.suggest_categorical("gamma", [0.9, 0.95, 0.99, 0.999]),
        "gae_lambda": trial.suggest_categorical("gae_lambda", [0.9, 0.95, 0.99]),      
    }
    params = modify_parameters_by_environment(trial, params, env_id)
     
    return params

def suggest_config_sdt(trial, env_id=''):
    params = {
        "depth": trial.suggest_int("depth", 4, 8),
        "learning_rate_actor": trial.suggest_float("learning_rate_actor", 0.0001, 0.01, log=True),
        "learning_rate_critic": trial.suggest_float("learning_rate_critic", 0.0001, 0.01, log=True),
        "reduce_lr": trial.suggest_categorical("reduce_lr", [False]),
        "adamW": trial.suggest_categorical("adamW", [False]),        
        "temperature": trial.suggest_categorical("temperature", [0.01, 0.05, 0.1, 0.5, 1, 1, 1, 1]),

        "critic": trial.suggest_categorical("critic", ["mlp", "sdt"]),
        
        "minibatch_size": trial.suggest_categorical("minibatch_size", [64, 128, 256, 512]),
        
        "n_update_epochs": trial.suggest_int("n_update_epochs", 1, 10),
        "max_grad_norm": trial.suggest_categorical("max_grad_norm", [0.1, 0.5, 1.0, 1000]),
        "norm_adv": trial.suggest_categorical("norm_adv", [True, False]),
        "ent_coef": trial.suggest_categorical("ent_coef", [0.0, 0.1, 0.2]),
        "vf_coef": trial.suggest_categorical("vf_coef", [0.25, 0.5, 0.75]),       

        "gamma": trial.suggest_categorical("gamma", [0.9, 0.95, 0.99, 0.999]),
        "gae_lambda": trial.suggest_categorical("gae_lambda", [0.9, 0.95, 0.99]),          
    }
    params = modify_parameters_by_environment(trial, params, env_id)
 
    
    return params


    
def suggest_config_dsdt(trial, env_id=''):
    params = {
        "depth": trial.suggest_int("depth", 4, 8),
        "learning_rate_actor": trial.suggest_float("learning_rate_actor", 0.0001, 0.01, log=True),
        "learning_rate_critic": trial.suggest_float("learning_rate_critic", 0.0001, 0.01, log=True),
        "reduce_lr": trial.suggest_categorical("reduce_lr", [False]),
        "adamW": trial.suggest_categorical("adamW", [False]),        
        "temperature": trial.suggest_categorical("temperature", [0.01, 0.05, 0.1, 0.5, 1, 1, 1, 1]),

        "critic": trial.suggest_categorical("critic", ["mlp", "sdt"]),
        
        "minibatch_size": trial.suggest_categorical("minibatch_size", [64, 128, 256, 512]),
        
        "n_update_epochs": trial.suggest_int("n_update_epochs", 1, 10),
        "max_grad_norm": trial.suggest_categorical("max_grad_norm", [0.1, 0.5, 1.0, 1000]),
        "norm_adv": trial.suggest_categorical("norm_adv", [True, False]),
        "ent_coef": trial.suggest_categorical("ent_coef", [0.0, 0.1, 0.2]),
        "vf_coef": trial.suggest_categorical("vf_coef", [0.25, 0.5, 0.75]),       

        "gamma": trial.suggest_categorical("gamma", [0.9, 0.95, 0.99, 0.999]),
        "gae_lambda": trial.suggest_categorical("gae_lambda", [0.9, 0.95, 0.99]),              
    }

    params = modify_parameters_by_environment(trial, params, env_id)

    return params

def suggest_config_stateActionDT(trial, env_id=''):
    params = {
        "num_layers": trial.suggest_int("num_layers", 1, 3),
        "neurons_per_layer": trial.suggest_int("neurons_per_layer", 16, 256),
        
        "learning_rate_actor": trial.suggest_float("learning_rate_actor", 0.0001, 0.01, log=True),
        "learning_rate_critic": trial.suggest_float("learning_rate_critic", 0.0001, 0.01, log=True),
        "reduce_lr": trial.suggest_categorical("reduce_lr", [False]),
        "adamW": trial.suggest_categorical("adamW", [False]),
        
        "minibatch_size": trial.suggest_categorical("minibatch_size", [64, 128, 256, 512]),
        
        "n_update_epochs": trial.suggest_int("n_update_epochs", 1, 10),
        "max_grad_norm": trial.suggest_categorical("max_grad_norm", [0.1, 0.5, 1.0, 1000]),
        "norm_adv": trial.suggest_categorical("norm_adv", [True, False]),
        "ent_coef": trial.suggest_categorical("ent_coef", [0.0, 0.1, 0.2]),
        "vf_coef": trial.suggest_categorical("vf_coef", [0.25, 0.5, 0.75]),       

        "gamma": trial.suggest_categorical("gamma", [0.9, 0.95, 0.99, 0.999]),
        "gae_lambda": trial.suggest_categorical("gae_lambda", [0.9, 0.95, 0.99]),      
    }
    params = modify_parameters_by_environment(trial, params, env_id)

    
    return params
    

cartpole = {
    'mlp': {
            "adamW": False,
            "ent_coef": 0.1,
            "gae_lambda": 0.99,
            "gamma": 0.99,
            "learning_rate_actor": 0.00020186874551533516,
            "learning_rate_critic": 0.0004322761291967347,
            "max_grad_norm": 0.5,
            "minibatch_size": 128,
            "n_envs": 10,
            "n_steps": 32,
            "n_update_epochs": 5,
            "neurons_per_layer": 127,
            "norm_adv": False,
            "num_layers": 2,
            "reduce_lr": False,
            "vf_coef": 0.5,
        },
        'sdt': {
            "adamW": False,
        
            "critic": 'mlp',
            "depth": 7,
            "ent_coef": 0.0,
        
            "gae_lambda": 0.9,
            "gamma": 0.999,
        
            "learning_rate_actor": 0.0011804116646341413,
            "learning_rate_critic": 0.006327778558167622,
        
            "max_grad_norm": 0.5,
            "minibatch_size": 128,
        
            "n_envs": 27,
            "n_steps": 8,
        
            "n_update_epochs": 3,
            "norm_adv": True,
        
            "reduce_lr": False,
        
            "temperature": 1,
        
            "vf_coef": 0.5,
        },

    'sympol': {
        "ent_coef": 0.2,
        "gae_lambda": 0.9,
        "gamma": 0.99,
    
        "learning_rate_actor_weights": 0.0002061595611924677,
        "learning_rate_actor_split_values": 0.00017078049184653506,
        "learning_rate_actor_split_idx_array": 0.022591907641836093,
        "learning_rate_actor_leaf_array": 0.0026279101288680462,
        "learning_rate_actor_log_std": 0.03413659356223772,
    
        "learning_rate_critic": 0.00019794050885091485,
    
        "max_grad_norm": 1000,
        "n_envs": 10,
        "n_steps": 8,
        "n_update_epochs": 5,
        "norm_adv": False,
        "reduce_lr": True,
        "vf_coef": 0.5,
    
        "SWA": True,
        "adamW": True,
        "dropout": 0.0,
        "depth": 7,
        "minibatch_size": 128,
        "n_estimators": 1,
    }

}


pendulum = {
    'mlp': { 
            "adamW": False,
            "ent_coef": 0.1,
            "gae_lambda": 0.95,
            "gamma": 0.95,
            "learning_rate_actor": 0.0011794230987352875,
            "learning_rate_critic": 0.005686601697893695,
            "max_grad_norm": 0.5,
            "minibatch_size": 128,
            "n_envs": 25,
            "n_steps": 4,
            "n_update_epochs": 5,
            "neurons_per_layer": 125,
            "norm_adv": False,
            "num_layers": 1,
            "reduce_lr": False,
            "vf_coef": 0.25,
        },

    'sdt': {
            "adamW": False,
            "critic": 'mlp',
            "depth": 7,
            "ent_coef": 0.2,
            "gae_lambda": 0.95,
            "gamma": 0.99,
            "learning_rate_actor": 0.00020307575043049532,
            "learning_rate_critic": 0.0036180353954483593,
            "max_grad_norm": 1,
            "minibatch_size": 128,
            "n_envs": 17,
            "n_steps": 8,
            "n_update_epochs": 8,
            "norm_adv": False,
            "reduce_lr": False,
            "temperature": 1,
            "vf_coef": 0.75,
        },
    'sympol': {
            "ent_coef": 0,
            "gae_lambda": 0.99,
            "gamma": 0.95,
        
            "learning_rate_actor_weights": 0.04661603302871774,
            "learning_rate_actor_split_values": 0.0005045225867385667,
            "learning_rate_actor_split_idx_array": 0.015939195610212384,
            "learning_rate_actor_leaf_array": 0.0007173630450832438,
            "learning_rate_actor_log_std": 0.00010584674824205176,
        
            "learning_rate_critic": 0.000742263646155321,
        
            "max_grad_norm": 1000,
            "n_envs": 30,
            "n_steps": 4,
            "n_update_epochs": 5,
            "norm_adv": True,
            "reduce_lr": True,
            "vf_coef": 0.25,
        
            "SWA": True,
            "adamW": True,
            "dropout": 0.0,
            "depth": 7,
            "minibatch_size": 128,
            "n_estimators": 1,
        },

    
}

mountaincarcontinuous = {
    'mlp': {
            "adamW": False,
            "ent_coef": 0.1,
            "gae_lambda": 0.95,
            "gamma": 0.95,
            "learning_rate_actor": 0.0002379996023382625,
            "learning_rate_critic": 0.00030040231244994126,
            "max_grad_norm": 1,
            "minibatch_size": 128,
            "n_envs": 16,
            "n_steps": 4,
            "n_update_epochs": 5,
            "neurons_per_layer": 143,
            "norm_adv": True,
            "num_layers": 2,
            "reduce_lr": False,
            "vf_coef": 0.75,
        },

    'sdt': {
            "adamW": False,
        
            "critic": 'sdt',
            "depth": 6,
            "ent_coef": 0.2,
        
            "gae_lambda": 0.99,
            "gamma": 0.95,
        
            "learning_rate_actor": 0.00010329480227186826,
            "learning_rate_critic": 0.0019305733819312944,
        
            "max_grad_norm": 1,
            "minibatch_size": 512,
        
            "n_envs": 25,
            "n_steps": 4,
        
            "n_update_epochs": 1,
            "norm_adv": True,
        
            "reduce_lr": False,
        
            "temperature": 0.05,
        
            "vf_coef": 0.5,
        },
    'sympol': {
        "ent_coef": 0.5,
        "gae_lambda": 0.99,
        "gamma": 0.95,
    
        "learning_rate_actor_weights": 0.0005410523201481451,
        "learning_rate_actor_split_values": 0.003992569425208814,
        "learning_rate_actor_split_idx_array": 0.0003037261329990236,
        "learning_rate_actor_leaf_array": 0.03264278768895047,
        "learning_rate_actor_log_std": 0.0034840748117048246,
    
        "learning_rate_critic": 0.00028509099532298435,
    
        "max_grad_norm": 1000,
        "n_envs": 27,
        "n_steps": 8,
        "n_update_epochs": 5,
        "norm_adv": True,
        "reduce_lr": True,
        "vf_coef": 0.5,
    
        "SWA": True,
        "adamW": True,  
        "dropout": 0.0,
        "depth": 7,       
        "minibatch_size": 128,
        "n_estimators": 3,    
    },

    
}


acrobot = {
    'mlp': { 
            "adamW": False,
            "ent_coef": 0.1,
            "gae_lambda": 0.9,
            "gamma": 0.99,
            "learning_rate_actor": 0.00028637003058499204,
            "learning_rate_critic": 0.002116627771867961,
            "max_grad_norm": 1,
            "minibatch_size": 128,
            "n_envs": 22,
            "n_steps": 8,
            "n_update_epochs": 5,
            "neurons_per_layer": 18,
            "norm_adv": True,
            "num_layers": 3,
            "reduce_lr": False,
            "vf_coef": 0.75,
        },

    'sdt': {
        "adamW": False,
        "critic": 'mlp',
        "depth": 6,
        "ent_coef": 0.0,
    
        "gae_lambda": 0.95,
        "gamma": 0.999,
        
        "learning_rate_actor": 0.0012338392250876777,
        "learning_rate_critic": 0.00020409039075550196,
    
        "max_grad_norm": 0.1,
        "minibatch_size": 64,
        
        "n_envs": 23,
        "n_steps": 32,
        
        "n_update_epochs": 6,
        "norm_adv": True,
        
        "reduce_lr": False,
    
        "temperature": 1,
        
        "vf_coef": 0.5,
    },
    'sympol': {
            "ent_coef": 0,
            "gae_lambda": 0.99,
            "gamma": 0.99,
            
            "learning_rate_actor_weights": 0.04567313750732325,
            "learning_rate_actor_split_values": 0.0001287715529113872,
            "learning_rate_actor_split_idx_array": 0.02552887791546833,
            "learning_rate_actor_leaf_array": 0.02832300149442877,
            "learning_rate_actor_log_std": 0.0421423777398233,
                
            "learning_rate_critic": 0.0012919593635434088,
        
            "max_grad_norm": 1000,
            "n_envs": 30,
            "n_steps": 8,
            "n_update_epochs": 5,
            "norm_adv": True,
            "reduce_lr": True,
            "vf_coef": 0.5,
        
            "SWA": True,
            "adamW": True,  
            "dropout": 0.0,
            "depth": 7,       
            "minibatch_size": 128,
            "n_estimators": 1,    
        },  
}

lunarlander = {
    'mlp': {
            "adamW": False,
        
            "ent_coef": 0.2,
        
            "gae_lambda": 0.95,
            "gamma": 0.999,
        
            "learning_rate_actor": 0.0019444728267224967,
            "learning_rate_critic": 0.005004654298152463,
        
            "max_grad_norm": 0.1,
            "minibatch_size": 128,
        
            "n_envs": 10,
            "n_steps": 8,
        
            "n_update_epochs": 5,
            "neurons_per_layer": 219,
            "norm_adv": True,
            "num_layers": 2,
        
            "reduce_lr": False,
        
            "vf_coef": 0.25,
        },

    'sdt': {
        "adamW": False,
        "critic": 'mlp',
        "depth": 7,
        "ent_coef": 0.0,
        "gae_lambda": 0.99,
        "gamma": 0.999,
        "learning_rate_actor": 0.002874389925564309,
        "learning_rate_critic": 0.00028788957588120356,
        "max_grad_norm": 0.5,
        "minibatch_size": 512,
        "n_envs": 15,
        "n_steps": 4,
        "n_update_epochs": 9,
        "norm_adv": True,
        "reduce_lr": False,
        "temperature": 1,
        "vf_coef": 0.25,
    },
    'sympol': {
            "ent_coef": 0.1,
            "gae_lambda": 0.9,
            "gamma": 0.9,
            "learning_rate_actor_weights": 0.0008063943291254216,
            "learning_rate_actor_split_values": 0.001197406456210193,
            "learning_rate_actor_split_idx_array": 0.01595581022164164,
            "learning_rate_actor_leaf_array": 0.04981737842388324,
            "learning_rate_actor_log_std": 0.008544458777287512,
            "learning_rate_critic": 0.0022410797929827957,
            "max_grad_norm": 1000,
            "n_envs": 9,
            "n_steps": 32,
            "n_update_epochs": 5,
            "norm_adv": True,
            "reduce_lr": False,
            "vf_coef": 0.75,
            "SWA": True,
            "adamW": True,
            "dropout": 0.0,
            "depth": 7,
            "minibatch_size": 128,
            "n_estimators": 1,
        },

    
}

