######## preprocess lidar #########
point_cloud_range = [-32.0, -32.0, -2, 32.0, 32.0, 8.0]
voxel_size = [0.25, 0.25, 10]
num_point_features: int = 3
num_filters = [32, 32]
bev_occ_size = [256, 256]
embed_dims = 128
###########image backbone##############
img_backbone = dict(
    type="ResNet",
    init_cfg=dict(type="Pretrained", checkpoint="torchvision://resnet34"),
    depth=34,
    num_stages=4,
    out_indices=(1, 2, 3),
    frozen_stages=1,
    norm_cfg=dict(type="BN2d", requires_grad=True),
    # norm_eval=False,
    style="pytorch",
)
############# image fpn ###########
level_num = 3
img_neck = dict(
    type="FPN",
    in_channels=[128, 256, 512],  # [216, 576, 1512],
    out_channels=embed_dims,
    start_level=0,
    add_extra_convs="on_output",
    num_outs=level_num,
    relu_before_extra_convs=True,
)
###########image backbone##############
pts_backbone = dict(
    type="ResNet",
    depth=34,
    in_channels=1,
    num_stages=4,
    out_indices=(1, 2, 3),
    frozen_stages=1,
    norm_cfg=dict(type="BN2d", requires_grad=True),
    # norm_eval=False,
    style="pytorch",
)
####### lidar fpn ###########
level_num = 3
pts_neck = dict(
    type="FPN",
    in_channels=[128, 256, 512],  # [216, 576, 1512],
    out_channels=embed_dims,
    start_level=0,
    add_extra_convs="on_output",
    num_outs=level_num,
    relu_before_extra_convs=True,
)
############## Gaussian Initializer #########
# embed_dims = embed_dims
include_opa = False
include_ele = False
semantics = True
semantic_dim = 11
scale_range = [0.01, 3.2]
xy_coordinate = "cartesian"
phi_activation = "sigmoid"
num_decoder = 4
pc_range = point_cloud_range

gaussian_init = dict(
    type="GaussianInit",
    num_anchor=0,
    embed_dims=embed_dims,
    anchor_grad=False,
    feat_grad=False,
    semantics=semantics,
    semantic_dim=semantic_dim,
    include_opa=include_opa,
    include_ele=include_ele,
    projection_in=None,
    random_samples=512,
)
############# Gaussian Encoder ##############
gaussian_encoder = dict(
    type="GaussianEncoder",
    anchor_encoder=dict(
        type="SparseGaussian3DEncoder",
        embed_dims=embed_dims,
        include_opa=include_opa,
        include_ele=include_ele,
        semantics=semantics,
        semantic_dim=semantic_dim,
    ),
    norm_layer=dict(type="LN", normalized_shape=embed_dims),
    ffn=dict(
        _delete_=True,
        type="AsymmetricFFN",
        in_channels=embed_dims,
        embed_dims=embed_dims,
        feedforward_channels=embed_dims * 4,
        ffn_drop=0.1,
        add_identity=False,
    ),
    deformable_model_pts=dict(
        type="DeformableFeatureAggregation_LiDAR",
        embed_dims=embed_dims,
        num_groups=4,
        num_levels=3,
        num_cams=1,
        attn_drop=0.15,
        use_deformable_func=True,
        use_camera_embed=False,
        residual_mode="none",
        kps_generator=dict(
            type="SparseGaussianKeyPointsGenerator_LiDAR",
            embed_dims=embed_dims,
            phi_activation=phi_activation,
            xyz_coordinate=xy_coordinate,
            num_learnable_pts=6,
            fix_scale=[
                [0, 0],
                [0.45, 0],
                [-0.45, 0],
                [0, 0.45],
                [0, -0.45],
            ],
            pc_range=pc_range,
            scale_range=scale_range,
        ),
    ),
    deformable_model_img=dict(
        type="DeformableFeatureAggregation_CAM",
        embed_dims=embed_dims,
        num_groups=4,
        num_levels=3,
        num_cams=1,
        attn_drop=0.15,
        use_deformable_func=True,
        use_camera_embed=False,
        residual_mode="none",
        kps_generator=dict(
            type="SparseGaussianKeyPointsGenerator_CAM",
            embed_dims=embed_dims,
            phi_activation=phi_activation,
            xyz_coordinate=xy_coordinate,
            num_learnable_pts=6,
            fix_scale=[
                [0, 0],
                [0.45, 0],
                [-0.45, 0],
                [0, 0.45],
                [0, -0.45],
            ],
            pc_range=pc_range,
            scale_range=scale_range,
        ),
    ),
    refine_layer=dict(
        type="SparseGaussianRefinementModuleV2",
        embed_dims=embed_dims,
        pc_range=pc_range,
        scale_range=scale_range,
        restrict_xy=False,
        unit_xy=[4.0, 4.0],
        refine_manual=None,
        phi_activation=phi_activation,
        semantics=semantics,
        semantic_dim=semantic_dim,
        include_opa=include_opa,
        include_ele=include_ele,
        xy_coordinate=xy_coordinate,
        semantics_activation="identity",
    ),
    # spconv_layer=dict(
    #     _delete_=True,
    #     type="SparseConv2D",
    #     in_channels=embed_dims,
    #     embed_channels=embed_dims,
    #     pc_range=pc_range,
    #     grid_size=[2.0, 2.0],
    #     phi_activation=phi_activation,
    #     xyz_coordinate=xy_coordinate,
    #     use_out_proj=True,
    #     use_multi_layer=True,
    # ),
    spconv_layer=dict(
        _delete_=True,
        type="GaussianAttention",
        embed_dims=embed_dims,
        num_head=4,
        dropout=0.15,
        batch_first=True,
    ),
    implicit_fusion=dict(
        _delete_=True,
        type="ImplicitFlattenFusion",
        embed_dims=embed_dims,
        num_groups=4,
        attn_drop=0.15,
        img_feature_size=[1, 12, 32],
        pts_feature_size=[8, 8],
    ),
    num_decoder=num_decoder,
    operation_order=[
        "identity",
        "deformable_pts",
        "add",
        "norm",
        # "identity",
        # "ffn",
        # "add",
        # "norm",
        "identity",
        "deformable_img",
        "add",
        "norm",
        "identity",
        "ffn",
        "add",
        "norm",
        "identity",
        "spconv",
        "add",
        "norm",
        "identity",
        "ffn",
        "add",
        "norm",
        "implicit_fusion",
        "refine",
    ]
    * num_decoder,
)
gaussian_decoder = dict(
    type="GaussianDecoder",
    apply_loss_type="random_1",
    num_classes=semantic_dim,
    empty_args=dict(
        # _delete_=True,
        mean=[0, 0, -1.0],
        scale=[100, 100, 8.0],
    ),
    with_empty=False,
    include_ele=include_ele,
    use_localaggprob=True,
    use_localaggprob_fast=True,
    combine_geosem=True,
    cuda_kwargs=dict(
        # _delete_=True,
        scale_multiplier=4,
        H=256,
        W=256,
        D=1,
        pc_min=[-32.0, -32.0, 0.0],
        grid_size=0.25,
    ),
)
