{
    "MODEL_CONFIG": {
        "custom_model": "actor_critic",
        "custom_model_config": {
            "input_conv_channels": 3,
            "critic_share_layers": False,
            "conv_filters": null,
            "actor_layer_sizes": [[39, 400], [400, 400], [400, 625]],
            "critic_layer_sizes": [[39, 400], [400, 400], [400, 1]],
            "action_masking": True,
            "discretize_size": 5,
            "discretize_mode": "exp"
        },
    },

    # Environment configuration
    "ENV_CONFIG": {
        "advice_mode": "", # in the set of {"", "aa", "eaa", "fixed"}
        "config": "ml10_reach-v2",
        "use_rgb": False,
        "see_through_walls": True,
        "start_rooms": [[0, 0], [0, 1]],
        "goal_rooms": [[1, 0], [1, 1]],
        "room_size": 3,
        "max_steps": 100,
        "exploration_bonus": False,
    },

    # Base configuration including algorithm parameters
    "BASE_CONFIG": {
        "env": "metaworld",
        "alg": "ppo",
        "device": 2, 
        "lr": 0.00005,
        "gamma": 0.9,
        "lambda": 0.8,
        "kl_coeff": 0.0,
        "clip_rewards": False,
        "clip_param": 0.15,
        "vf_clip_param": 10.0,
        "vf_loss_coeff": 0.5,
        "entropy_coeff": 0.01,
        "train_batch_size": 256,
        "sgd_minibatch_size": 128,
        "num_sgd_iter": 4,
        "num_parallel_envs": 8,
        "norm_adv": False,
        "lr_scheduler": 'constant'
    },

    # Hyper parameter optimization parameters
    "HPO_CONFIG": None,
    # {
    #     "lr": "tune.grid_search([0.001, 0.0005, 0.0001])",
    #     "lambda": "tune.grid_search([0.8, 0.9, 0.99])",
    #     "kl_coeff": "tune.grid_search([0.1, 0.5, 0.9])",
    #     "clip_param": "tune.grid_search([0.1, 0.2, 0.3])",
    #     "vf_loss_coeff": "tune.grid_search([0.5, 0.75, 1.0])",
    #     "entropy_coeff": "tune.grid_search([0.1, 0.01, 0.0])",
    #     "num_sgd_iter": "tune.grid_search([3, 10, 20])",
    # }
}