name: DAIR_lidarcamera_lamma3_pyramid_fusion
data_dir: "data/DAIR-V2X-C/cooperative-vehicle-infrastructure"
root_dir: "data/DAIR-V2X-C/cooperative-vehicle-infrastructure/train.json"
validate_dir: "data/DAIR-V2X-C/cooperative-vehicle-infrastructure/val.json"
test_dir: "data/DAIR-V2X-C/cooperative-vehicle-infrastructure/val.json"

yaml_parser: "load_general_params"
train_params:
  batch_size: &batch_size 1
  epoches: 30
  eval_freq: 2
  save_freq: 2
  max_cav: 2

comm_range: 100
input_source: ['lidar', 'camera']
label_type: 'lidar'
cav_lidar_range: &cav_lidar [-102.4, -51.2, -3.5, 102.4, 51.2, 1.5]

heter:
  assignment_path: null
  ego_modality: &ego_modality "m1&m2"
  mapping_dict:
    m1: m1
    m2: m2
  modality_setting:
    m1:
      sensor_type: &sensor_type_m1 'lidar'
      core_method: &core_method_m1 "point_pillar"
      # model_dir: &model_dir_m1 "opencood/logs/official_checkpoints/HEAL_DAIR/stage1/Pyramid_DAIR_m1_base_2023_08_14_11_42_29/net_epoch_bestval_at23.pth"
      freeze: &freeze_m1 true

      # lidar requires preprocess
      preprocess:
        # options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
        core_method: 'SpVoxelPreprocessor'
        args:
          voxel_size: &voxel_size [0.4, 0.4, 5]
          max_points_per_voxel: 32
          max_voxel_train: 32000
          max_voxel_test: 70000
        # lidar range for each individual cav.
        cav_lidar_range: *cav_lidar

    m2:
      sensor_type: &sensor_type_m2 'camera'
      core_method: &core_method_m2 "lift_splat_shoot"
      # model_dir: &model_dir_m2 "opencood/logs/official_checkpoints/HEAL_DAIR/stage2_and_final_infer/m2_alignto_m1/net_epoch_bestval_at21.pth"
      freeze: &freeze_m2 false

      grid_conf: &grid_conf_m2
        xbound: [-102.4, 102.4, 0.4]   # Limit the range of the x direction and divide the grids
        ybound: [-51.2, 51.2, 0.4]   # Limit the range of the y direction and divide the grids
        zbound: [-10, 10, 20.0]   # Limit the range of the z direction and divide the grids
        ddiscr: [2, 100, 98]
        mode: 'LID'
      data_aug_conf: &data_aug_conf_m2
        resize_lim: [0.27, 0.28]
        final_dim: [288, 512]
        rot_lim: [0, 0]
        H: 1080
        W: 1920
        rand_flip: False
        bot_pct_lim: [0.0, 0.05]
        cams: ['camera0', 'camera1', 'camera2', 'camera3']
        Ncams: 4

fusion:
  core_method: 'intermediate'
  dataset: 'dairv2x'
  args: 
    proj_first: false
    grid_conf: *grid_conf_m2 
    data_aug_conf: *data_aug_conf_m2 

data_augment: # no use in intermediate fusion
  - NAME: random_world_flip
    ALONG_AXIS_LIST: [ 'x' ]

  - NAME: random_world_rotation
    WORLD_ROT_ANGLE: [ -0.78539816, 0.78539816 ]

  - NAME: random_world_scaling
    WORLD_SCALE_RANGE: [ 0.95, 1.05 ]

preprocess:
  # options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
  core_method: 'SpVoxelPreprocessor'
  args:
    voxel_size: *voxel_size # useful
    max_points_per_voxel: 32
    max_voxel_train: 32000
    max_voxel_test: 70000
  # lidar range for each individual cav.
  cav_lidar_range: *cav_lidar

# anchor box related
postprocess:
  core_method: 'VoxelPostprocessor' # VoxelPostprocessor, BevPostprocessor supported
  gt_range: *cav_lidar
  anchor_args:
    cav_lidar_range: *cav_lidar
    l: 3.9
    w: 1.6
    h: 1.56
    r: &anchor_yaw [0, 90]
    feature_stride: &feature_stride 2
    num: &anchor_num 2
  target_args:
    pos_threshold: 0.6
    neg_threshold: 0.45
    score_threshold: 0.2
  order: 'hwl' # hwl or lwh
  max_num: 150 # maximum number of objects in a single frame. use this number to make sure different frames has the same dimension in the same batch
  nms_thresh: 0.15
  dir_args: &dir_args
    dir_offset: 0.7853
    num_bins: 2
    anchor_yaw: *anchor_yaw

# model related
model:
  core_method: point_pillar_lss_lamma2_pyramid_fusion #heter_model_baseline
  args:
    # ego_modality: *ego_modality
    lidar_range: *cav_lidar
    voxel_size: *voxel_size

    m1:
      core_method: *core_method_m1
      sensor_type: *sensor_type_m1
      # model_dir: *model_dir_m1
      freeze: *freeze_m1

      encoder_args:
        freeze: true
        voxel_size: *voxel_size
        lidar_range: *cav_lidar
        pillar_vfe:
          use_norm: true
          with_distance: false
          use_absolute_xyz: true
          num_filters: [64]
        point_pillar_scatter:
          num_features: 64

      backbone_args:
        freeze: true
        layer_nums: [3]
        layer_strides: [2]
        num_filters: [64]

      aligner_args:
        freeze: true
        core_method: identity


    m2:
      core_method: *core_method_m2
      sensor_type: *sensor_type_m2
      # model_dir: *model_dir_m2
      freeze: *freeze_m2

      encoder_args:
        freeze: false
        anchor_number: *anchor_num
        grid_conf: *grid_conf_m2
        data_aug_conf: *data_aug_conf_m2
        img_downsample: 8
        img_features: &img_feature 128
        use_depth_gt: false
        depth_supervision: false
        camera_encoder: EfficientNet

      camera_mask_args:
        cav_lidar_range: *cav_lidar
        grid_conf: *grid_conf_m2

      backbone_args:
        freeze: false
        layer_nums: [3]
        layer_strides: [2]
        num_filters: [64]
        inplanes: 128

      aligner_args:
        freeze: false
        core_method: convnext
        warp_camera_bev: false
        args:
          num_of_blocks: 3
          dim: 64
          warpnet_indim: 128

    single_modality: false # for inference: lidar camera
    # mm_pooling: 
    #   pool_method: 'max'
    #   pool_kernel_size: 2
    mm_fusion_method: 'lamma3'
    lamma:
      freeze: false
      feature_stride: *feature_stride
      layer_num: 2
      feat_dim: 64
      dim: 128
      heads: 2
      single_mode: false # lidar camera or false
      random_drop: false # should be false in inference
      lidar_drop_ratio: 0.5 # useless when not single_mode

    # ma_fusion_method: att 
    # att:
    #   feat_dim: 256
    fusion_backbone: 
      resnext: true
      layer_nums: [3, 5, 8]
      layer_strides: [1, 2, 2]
      num_filters: [64, 128, 256]
      upsample_strides: [1, 2, 4]
      num_upsample_filter: [128, 128, 128]
      anchor_number: *anchor_num

    shrink_header: 
      kernal_size: [ 3 ]
      stride: [ 1 ]
      padding: [ 1 ]
      dim: [ 256 ]
      input_dim: 384 # 128 * 3

    in_head: 256
    anchor_number: *anchor_num
    dir_args: *dir_args
    head_freeze: false
    # head_pretrained: 'lidar'


# # model related
# model:
#   core_method: heter_model_baseline
#   args:
#     ego_modality: *ego_modality
#     lidar_range: *cav_lidar

#     m1:
#       core_method: *core_method_m1
#       sensor_type: *sensor_type_m1

#       encoder_args:
#         voxel_size: *voxel_size
#         lidar_range: *cav_lidar
#         pillar_vfe:
#           use_norm: true
#           with_distance: false
#           use_absolute_xyz: true
#           num_filters: [64]
#         point_pillar_scatter:
#           num_features: 64

#       backbone_args:
#         layer_nums: [3, 5, 8]
#         layer_strides: [2, 2, 2]
#         num_filters: [64, 128, 256]
#         upsample_strides: [1, 2, 4]
#         num_upsample_filter: [128, 128, 128]
  
#       shrink_header: 
#         kernal_size: [ 3 ]
#         stride: [ 1 ]
#         padding: [ 1 ]
#         dim: [ 256 ]
#         input_dim: 384 # 128 * 3

#     m2:
#       core_method: *core_method_m2
#       sensor_type: *sensor_type_m2

#       encoder_args:
#         anchor_number: *anchor_num
#         grid_conf: *grid_conf_m2
#         data_aug_conf: *data_aug_conf_m2
#         img_downsample: 8
#         img_features: &img_feature 128
#         use_depth_gt: false
#         depth_supervision: false
#         camera_encoder: EfficientNet

#       camera_mask_args:
#         cav_lidar_range: *cav_lidar
#         grid_conf: *grid_conf_m2

#       backbone_args:
#         layer_nums: [3, 5, 8]
#         layer_strides: [2, 2, 2]
#         num_filters: [64, 128, 256]
#         upsample_strides: [1, 2, 4]
#         num_upsample_filter: [128, 128, 128]
#         inplanes: 128
  
#       shrink_header: 
#         kernal_size: [ 3 ]
#         stride: [ 1 ]
#         padding: [ 1 ]
#         dim: [ 256 ]
#         input_dim: 384 # 128 * 3

#     fusion_method: att 
#     att:
#       feat_dim: 256

#     in_head: 256
    
#     anchor_number: *anchor_num
#     dir_args: *dir_args

loss:
  core_method: point_pillar_loss
  args:
    pos_cls_weight: 2.0
    cls:
      type: 'SigmoidFocalLoss'
      alpha: 0.25
      gamma: 2.0
      weight: 1.0
    reg:
      type: 'WeightedSmoothL1Loss'
      sigma: 3.0
      codewise: true
      weight: 2.0
    dir:
      type: 'WeightedSoftmaxClassificationLoss'
      weight: 0.2
      args: *dir_args
    depth:
      weight: 1.0

optimizer:
  core_method: Adam
  lr: 0.002
  args:
    eps: 1e-10
    weight_decay: 1e-4

lr_scheduler:
  core_method: multistep #step, multistep and Exponential support
  gamma: 0.1
  step_size: [5, 25]

