# icml paper: ...
DEFAULT_CONFIG = {
    # "lr": 3e-3,
    # "discount": 0.99,  # gamma
    # "tau": 0.01,  # for soft update
    # "target_update_interval": 1,
    # "action_prior": "uniform",
    # "reparameterize": False,
    # "use_attention": False,  # valid only multi-agent case
    "trainer": "sac.trainer.SACTrainer",
    "loss": "sac.loss.SACLoss",
    "training_config": {
        "actor_lr": 3e-4,
        "critic_lr": 3e-4,
        "alpha_lr": 3e-4,
        "update_interval": 1,
        "use_multihead": False,
        "optimizer": "Adam",
        "batch_size": 64,
        "gamma": 0.95,
        "tau": 0.01,
        "auto_alpha": False,
        "grad_norm_clipping": 0.5,
    },
    "model_config": {
        "actor": {
            "network": "mlp",
            "layers": [
                {"units": 64, "activation": "ReLU"},
                {"units": 64, "activation": "ReLU"},
            ],
            "output": {"activation": False},
        },
        "critic": {
            "network": "mlp",
            "layers": [
                {"units": 64, "activation": "ReLU"},
                {"units": 64, "activation": "ReLU"},
            ],
            "output": {"activation": False},
        },
        "auto_alpha": False,
        # Initial value to use for the entropy weight alpha.
        "initial_alpha": 1.0,
        # Target entropy lower bound. If None, will be set to -|A| for continous,
        # 0.98*-log(1/|A|) for discrete, (e.g. 0.98*-log(1/2) for
        # Discrete(2), -3.0 for Box(shape=(3,))).
        # This is the inverse of reward scale, and will be optimized automatically.
        "target_entropy": None,
    },
    "custom_config": {"use_cuda": False},
    "resource_config": {},
}
