{   "General": {
        "policy_approx" : "MLP"
    },
    "CartPole-v1": {
        "learner_args": {
            "device": "cuda"
        },
        "alg_confs": {
            "learning_rate" : 0.0005,
            "gamma"         : 0.98,
            "lmbda"         : 0.95,
            "eps_clip"      : 0.1,
            "K_epoch"       : 3,
            "hidden_dim"    : 128
        },
        "train_confs": {
            "episodes"      : 3000,
            "t_horizon"     : 1000,
            "model_path"    : "../data/mlp/model/cartpole/ppo",
            "log_path"      : "../data/mlp/log/cartpole/reward"
        }
    },

    "LunarLander-v2": {
        "learner_args": {
            "device": "cuda"
        },
        "alg_confs": {
            "learning_rate" : 0.0005,
            "gamma"         : 0.98,
            "lmbda"         : 0.95,
            "eps_clip"      : 0.1,
            "K_epoch"       : 3,
            "hidden_dim"    : 128
        },
        "train_confs": {
            "episodes"      : 5000,
            "t_horizon"     : 1000,
            "model_path"    : "../data/mlp/model/lunarlander/ppo",
            "log_path"    : "../data/mlp/log/lunarlander/reward"
        }
    },

    "MountainCar-v0": {
        "learner_args": {
            "device": "cuda"
        },
        "alg_confs": {
            "learning_rate" : 0.005,
            "gamma"         : 0.999,
            "lmbda"         : 0.98,
            "eps_clip"      : 0.1,
            "K_epoch"       : 10,
            "hidden_dim"    : 32
        },
        "train_confs": {
            "episodes"      : 5000,
            "t_horizon"     : 1000,
            "model_path"    : "../data/mlp/model/mountaincar/ppo",
            "log_path"    : "../data/mlp/log/mountaincar/reward"
        }
    },

    "Acrobot-v1": {
        "learner_args": {
            "device": "cuda"
        },
        "alg_confs": {
            "learning_rate" : 0.0005,
            "gamma"         : 0.98,
            "lmbda"         : 0.95,
            "eps_clip"      : 0.1,
            "K_epoch"       : 3,
            "hidden_dim"    : 128
        },
        "train_confs": {
            "episodes"      : 7000,
            "t_horizon"     : 1000,
            "model_path"    : "../data_no_norm/mlp/model/acrobot/ppo",
            "log_path"    : "../data_no_norm/mlp/log/acrobot/reward"
        }
    }
    
    

}
