from ml_collections import config_dict


def get_config(algorithm_name):
    config = config_dict.ConfigDict()

    config.name = algorithm_name
    
    config.device = "gpu"  # cpu, gpu
    config.total_timesteps = 1e9
    config.learning_rate = 3e-4
    config.anneal_learning_rate = False
    config.nr_steps = 2048
    config.nr_epochs = 10
    config.minibatch_size = 64
    config.gamma = 0.99
    config.gae_lambda = 0.95
    config.clip_range = 0.2
    config.entropy_coef = 0.001
    config.critic_coef = 0.5
    config.max_grad_norm = 0.5
    config.std_dev = 1.0
    config.nr_hidden_units = 256
    config.evaluation_frequency = 204800  # -1 to disable
    config.evaluation_episodes = 10

    config.nr_hidden_units_disc = 64
    config.learning_rate_disc = 1e-5
    config.nr_epochs_disc = 1 # Number of disc epochs
    config.env_reward_frac = 0.0
    config.data_path = "../expert_data/ant/PPO_expert_30_episodes.npz"
    config.handle_absorbing_states = True
    config.gp_lambda = 0.1
    config.gp_alpha = 0.5

    config.reward_type = 'state-action' # options: state-action, state-based, shaped, shaped-sa, uncorrelated
    config.reward_approximator_type = 'state-action' # options: state-action, state-based, shaped, shaped-sa
    config.epsilon = 0.1
    config.disc_buffer_capacity = 35
    config.uncorrelated_disc = False
    config.init_eta = 2.0
    config.const_eta = False
    config.beta = float(1/config.entropy_coef)

    config.nr_epochs_rew = 30
    config.learning_rate_reward_fn = 5.7797838797843984e-05
    config.reward_fn_approximator = False

    config.global_rew_experiment = False
    config.subsampling_cutoff = 1

    return config
