{
    "MODEL_CONFIG": {
        "custom_model": "actor_critic",
        "custom_model_config": {
            "input_conv_channels": 3,
            "critic_share_layers": False,
            "conv_filters": [
                ["conv2d", [16, [2, 2], 1, 1]],
                ["relu", []],
                ["maxpool2d", [[2, 2], 2]],
                ["conv2d", [32, [2, 2], 1, 0]],
                ["relu", []],
                ["conv2d", [64, [2, 2], 1, 0]],
                ["relu", []],
            ],
            "actor_layer_sizes": [[576, 7]],
            "critic_layer_sizes": [[576, 1]],
            "action_masking": True,
        },
    },

    # Environment configuration
    "ENV_CONFIG": {
        "advice_mode": "", # in the set of {"", "aa", "eaa", "fixed"}

        # "config": [["wwow", "owwd"], ["wwow", "odww"]],
        "config": [["wwow", "owwo"], ["wwow", "ooww"]],
        "use_rgb": False,
        "see_through_walls": True,
        "start_rooms": [[0, 0], [0, 1]],
        "goal_rooms": [[1, 0], [1, 1]],
        "room_size": 3,
        "max_steps": 500,
        "exploration_bonus": False,
        "true_goal": "orange",
        "punishment": ["negative_reward"],
    },

    # Base configuration including algorithm parameters
    "BASE_CONFIG": {
        "env": "two_goal_grid",
        "name": "two_goal_random_orange2",
        "alg": "ppo",
        "device": -1,
        "lr": 0.0005,
        "lr_scheduler": 'constant',
        "gamma": 0.9,
        "lambda": 0.8,
        "kl_coeff": 0.0,
        "clip_rewards": False,
        "clip_param": 0.2,
        "vf_clip_param": 10.0,
        "vf_loss_coeff": 0.5,
        "entropy_coeff": 0.01,
        "train_batch_size": 256,
        "sgd_minibatch_size": 128,
        "num_sgd_iter": 4,
        "num_parallel_envs": 8,
        "norm_adv": False
    },

    # Hyper parameter optimization parameters
    "HPO_CONFIG": None,
    # {
    #     "lr": "tune.grid_search([0.001, 0.0005, 0.0001])",
    #     "lambda": "tune.grid_search([0.8, 0.9, 0.99])",
    #     "kl_coeff": "tune.grid_search([0.1, 0.5, 0.9])",
    #     "clip_param": "tune.grid_search([0.1, 0.2, 0.3])",
    #     "vf_loss_coeff": "tune.grid_search([0.5, 0.75, 1.0])",
    #     "entropy_coeff": "tune.grid_search([0.1, 0.01, 0.0])",
    #     "num_sgd_iter": "tune.grid_search([3, 10, 20])",
    # }
}