{   "General": {
    "policy_approx" : "SDT"
    },
    "CartPole-v1": {
        "learner_args": {
            "input_dim": 4,
            "output_dim": 2,
            "depth": 3,
            "lamda": 1e-3,
            "lr": 1e-3,
            "weight_decay": 0.0,
            "batch_size": 1280,
            "epochs": 40,
            "device": "cuda",
            "log_interval": 100,
            "exp_scheduler_gamma": 1.0,
            "beta" : 0,
            "l1_regularization": 0,
            "greatest_path_probability": 1
        },
        "alg_confs": {
            "learning_rate" : 0.0005,
            "gamma"         : 0.98,
            "lmbda"         : 0.95,
            "eps_clip"      : 0.1,
            "K_epoch"       : 3,
            "hidden_dim"    : 128
        },
        "train_confs": {
            "episodes"      : 3000,
            "t_horizon"     : 1000,
            "model_path"    : "../data/sdt/model/cartpole/rl_ppo",
            "log_path"      : "../data/sdt/log/cartpole/rl_reward"
        }
    },

    "LunarLander-v2": {
        "learner_args": {
            "input_dim": 8,
            "output_dim": 4,
            "depth": 4,
            "lamda": 1e-3,
            "lr": 1e-3,
            "weight_decay": 0.0,
            "batch_size": 1280,
            "epochs": 40,
            "device": "cuda",
            "log_interval": 100,
            "exp_scheduler_gamma": 1.0,
            "beta" : 0,
            "l1_regularization": 0,
            "greatest_path_probability": 1
        },
        "alg_confs": {
            "learning_rate" : 0.0005,
            "gamma"         : 0.98,
            "lmbda"         : 0.95,
            "eps_clip"      : 0.1,
            "K_epoch"       : 3,
            "hidden_dim"    : 128
        },
        "train_confs": {
            "episodes"      : 5000,
            "t_horizon"     : 1000,
            "model_path"    : "../data/sdt/model/lunarlander/rl_ppo",
            "log_path"    : "../data/sdt/log/lunarlander/rl_reward"
        }

    },

    "MountainCar-v0": {
        "learner_args": {
            "input_dim": 2,
            "output_dim": 3,
            "depth": 3,
            "lamda": 1e-3,
            "lr": 1e-3,
            "weight_decay": 0.0,
            "batch_size": 128,
            "epochs": 40,
            "device": "cuda",
            "log_interval": 100,
            "exp_scheduler_gamma": 1.0,
            "beta" : 0,
            "l1_regularization": 0,
            "greatest_path_probability": 1
        },
        "alg_confs": {
            "learning_rate" : 0.005,
            "gamma"         : 0.999,
            "lmbda"         : 0.98,
            "eps_clip"      : 0.1,
            "K_epoch"       : 10,
            "hidden_dim"    : 32
        },
        "train_confs": {
            "episodes"      : 5000,
            "t_horizon"     : 1000,
            "model_path"    : "../data/sdt/model/mountaincar/rl_ppo",
            "log_path"    : "../data/sdt/log/mountaincar/rl_reward"
        }

    }
}
