{
    "MODEL_CONFIG": {
        "custom_model": "actor_critic",
        "custom_model_config": {
            "input_conv_channels": 25,
            "critic_share_layers": False,
            "use_leaky_relu": True,
            "use_d2rl": False,
            "conv_filters": [
                [ "conv2d", [ 25, [ 5, 5 ], 1, 2 ] ], # same
                [ "leaky_relu", [ ] ],
                [ "conv2d", [ 25, [ 3, 3 ], 1, 1 ] ], # same
                [ "leaky_relu", [ ] ],
                [ "conv2d", [ 25, [ 3, 3 ], 1, 0] ], # reduced
                [ "leaky_relu", [ ]]
            ],

            "actor_layer_sizes": [
                [ 257, 64],
                [ 64,  64],
                [ 64,  64],
                [ 64,  6],
            ],
            "critic_layer_sizes": [
                [ 257, 64 ],
                [ 64,  64 ],
                [ 64,  64 ],
                [ 64,  1],
            ],
            "action_masking": False
        }
    },
        # Environment configuration
    "ENV_CONFIG": {
        "layout_name": "forced_room_ot", # the layout name in the overcook repo
        "horizon": 400,
        "max_steps": 400,
        'use_rgb': False,
        'reward_shaping_horizon': 20000000,
        'random_layout': True,
        'random_recipe': True
    },

    # Base configuration including algorithm parameters
    "BASE_CONFIG": {
        "env": "single_cook",
        "name": "forced_room_ot",
        "alg": "ppo",
        "device": -1,
        "lr": 0.0006,
        "lr_scheduler": 'constant',
        "gamma": 0.99,
        "lambda": 0.6,
        "kl_coeff": 0.0,
        "clip_rewards": False,
        "clip_param": 0.2,
        "vf_clip_param": 10.0,
        "vf_loss_coeff": 0.5,
        "entropy_coeff": 0.1,
        "train_batch_size": 9600,
        "sgd_minibatch_size": 1600,
        "num_sgd_iter": 8,
        "num_parallel_envs": 24,
        "norm_adv": False
    },

    # Hyper parameter optimization parameters
    "HPO_CONFIG": None,
    # {
    #     "lr": "tune.grid_search([0.001, 0.0005, 0.0001])",
    #     "lambda": "tune.grid_search([0.8, 0.9, 0.99])",
    #     "kl_coeff": "tune.grid_search([0.1, 0.5, 0.9])",
    #     "clip_param": "tune.grid_search([0.1, 0.2, 0.3])",
    #     "vf_loss_coeff": "tune.grid_search([0.5, 0.75, 1.0])",
    #     "entropy_coeff": "tune.grid_search([0.1, 0.01, 0.0])",
    #     "num_sgd_iter": "tune.grid_search([3, 10, 20])",
    # }
}