name: Pyramid_m1m2m3m4 # name for logging
root_dir: "dataset/OPV2V/train" # data directory for training data
validate_dir: "dataset/OPV2V/validate" # data directory for validation data
test_dir: "dataset/OPV2V/test" # data directory for testing data

yaml_parser: "load_general_params" # suitable for most cases
train_params:
  batch_size: &batch_size 1
  epoches: 50
  eval_freq: 2 # evaluate every 2 epoches
  save_freq: 2 # save every 2 epoches
  max_cav: 5 # max agent number in a scene

comm_range: 70
input_source: ['lidar', 'camera', 'depth'] 
# according to your modality setting, you choose data for loading
# 'depth' is only viable with OPV2V-H dataset
# 'camera' is not viable with V2X-Sim dataset

label_type: 'lidar' # 'lidar' or 'camera'. 
# 'lidar' includes more gt box than camera
# when use heterogeneous dataloader, i.e., yaml['fusion']['core_method'] = 'intermediateheter' or 'lateheter'
# agent will adopt its own label_type in training, depending on its modality
# agent will all adopt 'lidar' label in testing.


cav_lidar_range: &cav_lidar [-102.4, -102.4, -3, 102.4, 102.4, 1] # [xmin, ymin, zmin, xmax, ymax, zmax]

add_data_extension: ['bev_visibility.png'] # additional data extension for loading. ['bev_visibility.png'] for camera API.


heter: # heterogenous setting
  assignment_path: "opencood/logs/heter_modality_assign/opv2v_4modality.json" 
  ego_modality: &ego_modality "m1&m2&m3&m4" # agent types that are allowed to become ego, separated by '&'.
  lidar_channels_dict: # lidar channels for each agent type. If omitted, 64-channel lidar data are used for OPV2V.
    m3: 32
  mapping_dict: # Use the following dictionary to map the agent type set in ‘assignment_path’ to the agent type actually assigned in experiment.
    m1: m1
    m2: m2
    m3: m3
    m4: m4
  modality_setting: # assign each agent type's modality.
    m1:
      sensor_type: &sensor_type_m1 'lidar'
      core_method: &core_method_m1 "point_pillar"

      # lidar requires preprocess
      preprocess:
        core_method: 'SpVoxelPreprocessor'
        args:
          voxel_size: &voxel_size [0.4, 0.4, 4]
          max_points_per_voxel: 32
          max_voxel_train: 32000
          max_voxel_test: 70000
        # lidar range for each individual cav.
        cav_lidar_range: *cav_lidar
    m2:
      sensor_type: &sensor_type_m2 'camera'
      core_method: &core_method_m2 "lift_splat_shoot"

      grid_conf: &grid_conf_m2
        xbound: [-51.2, 51.2, 0.4]   # Limit the range of the x direction and divide the grids
        ybound: [-51.2, 51.2, 0.4]   # Limit the range of the y direction and divide the grids
        zbound: [-10, 10, 20.0]   # Limit the range of the z direction and divide the grids
        ddiscr: [2, 50, 48]
        mode: 'LID'
      data_aug_conf: &data_aug_conf_m2
        resize_lim: [0.65, 0.7]
        final_dim: [384, 512]
        rot_lim: [-3.6, 3.6]
        H: 600
        W: 800
        rand_flip: False
        bot_pct_lim: [0.0, 0.05]
        cams: ['camera0', 'camera1', 'camera2', 'camera3']
        Ncams: 4

    m3:
      sensor_type: &sensor_type_m3 'lidar'
      core_method: &core_method_m3 "second"

      # lidar requires preprocess
      preprocess:
        # options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
        core_method: 'SpVoxelPreprocessor'
        args:
          voxel_size: &voxel_size_m3 [0.1, 0.1, 0.1]
          max_points_per_voxel: 5
          max_voxel_train: 32000
          max_voxel_test: 70000
        # lidar range for each individual cav.
        cav_lidar_range: *cav_lidar

    m4:
      sensor_type: &sensor_type_m4 'camera'
      core_method: &core_method_m4 "lift_splat_shoot"

      grid_conf: &grid_conf_m4
        xbound: [-51.2, 51.2, 0.4]   # Limit the range of the x direction and divide the grids
        ybound: [-51.2, 51.2, 0.4]   # Limit the range of the y direction and divide the grids
        zbound: [-10, 10, 20.0]   # Limit the range of the z direction and divide the grids
        ddiscr: [2, 50, 48]
        mode: 'LID'
      data_aug_conf: &data_aug_conf_m4
        resize_lim: [0.56, 0.61]
        final_dim: [336, 448]
        rot_lim: [-3.6, 3.6]
        H: 600
        W: 800
        rand_flip: False
        bot_pct_lim: [0.0, 0.05]
        cams: ['camera0', 'camera1', 'camera2', 'camera3']
        Ncams: 4

fusion:
  core_method: 'intermediateheter' 
  # if 'heter' in yaml, 'intermediateheter' or 'lateheter' is supported
  # if 'heter' not in yaml, 'intermediate', 'late' or 'early' is supported. 
  #     See those (old-style but also compatible) yaml in CoAlign's repo: https://github.com/yifanlu0227/CoAlign
  dataset: 'opv2v' # or 'v2xset', 'v2xsim', 'dairv2x'
  args: 
    proj_first: false # 'false' is one-round communciation. 'true' is two-round communication sending pose first (I think it's very unpractical) 
    grid_conf: None # place-holder
    data_aug_conf: None # place-holder


preprocess:
  # options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
  core_method: 'SpVoxelPreprocessor'
  args:
    voxel_size: [0.4, 0.4, 4] # useful
    max_points_per_voxel: 1 # place-holder
    max_voxel_train: 1 # place-holder
    max_voxel_test: 1 # place-holder
  # lidar range for each individual cav.
  cav_lidar_range: *cav_lidar

# anchor box related
postprocess:
  core_method: 'VoxelPostprocessor' # VoxelPostprocessor, BevPostprocessor supported
  gt_range: *cav_lidar
  anchor_args:
    cav_lidar_range: *cav_lidar
    l: 3.9
    w: 1.6
    h: 1.56
    r: &anchor_yaw [0, 90]
    feature_stride: 2
    num: &anchor_num 2
  target_args:
    pos_threshold: 0.6
    neg_threshold: 0.45
    score_threshold: 0.2
  order: 'hwl' # hwl or lwh
  max_num: 150 # maximum number of objects in a single frame. use this number to make sure different frames has the same dimension in the same batch
  nms_thresh: 0.15
  dir_args: &dir_args
    dir_offset: 0.7853
    num_bins: 2
    anchor_yaw: *anchor_yaw


# network structure:
# encoder_m1 -> backbone_m1 -> aligner_m1 --> fusion_backbone -> shrink_header -> heads
# encoder_m2 -> backbone_m2 -> aligner_m2 -| 
# encoder_m3 -> backbone_m3 -> aligner_m3 -| 
# encoder_m4 -> backbone_m4 -> aligner_m4 -| 
model:
  core_method: heter_pyramid_collab
  args:
    lidar_range: *cav_lidar
    supervise_single: true

    m1: # detailed model setting for m1 agent type
      core_method: *core_method_m1
      sensor_type: *sensor_type_m1

      encoder_args:
        voxel_size: *voxel_size
        lidar_range: *cav_lidar
        pillar_vfe:
          use_norm: true
          with_distance: false
          use_absolute_xyz: true
          num_filters: [64]
        point_pillar_scatter:
          num_features: 64

      backbone_args:
        layer_nums: [3]
        layer_strides: [2]
        num_filters: [64]
  
      aligner_args: # see opencood/models/sub_modules/feature_alignnet.py
        core_method: identity

    m2: # detailed model setting for m2 agent type
      core_method: *core_method_m2
      sensor_type: *sensor_type_m2

      encoder_args:
        anchor_number: *anchor_num
        grid_conf: *grid_conf_m2
        data_aug_conf: *data_aug_conf_m2
        img_downsample: 8
        img_features: &img_feature 128
        use_depth_gt: false # use depth GT in LSS projection
        depth_supervision: true # add depth supervision for LSS. set to 'false' if no depth data
        camera_encoder: EfficientNet

      camera_mask_args:
        cav_lidar_range: *cav_lidar
        grid_conf: *grid_conf_m2

      backbone_args:
        layer_nums: [3]
        layer_strides: [2]
        num_filters: [64]
        inplanes: 128

      aligner_args:
        core_method: identity

    m3: # detailed model setting for m3 agent type
      core_method: *core_method_m3
      sensor_type: *sensor_type_m3

      encoder_args:
        voxel_size: *voxel_size_m3
        lidar_range: *cav_lidar
        mean_vfe:
          num_point_features: 4
        spconv:
          num_features_in: 64
          num_features_out: 64
        map2bev:
          feature_num: 128

      backbone_args:
        layer_nums: [3]
        layer_strides: [1]
        num_filters: [64]
        inplanes: 128
      
      aligner_args:
        core_method: identity

    m4: # detailed model setting for m4 agent type
      core_method: *core_method_m4
      sensor_type: *sensor_type_m4

      encoder_args:
        anchor_number: *anchor_num
        grid_conf: *grid_conf_m4
        data_aug_conf: *data_aug_conf_m4
        img_downsample: 8
        img_features: 128
        use_depth_gt: false # use depth GT in LSS projection
        depth_supervision: true # add depth supervision for LSS. set to 'false' if no depth data
        camera_encoder: Resnet101

      camera_mask_args:
        cav_lidar_range: *cav_lidar
        grid_conf: *grid_conf_m4

      backbone_args:
        layer_nums: [3]
        layer_strides: [2]
        num_filters: [64]
        inplanes: 128
        
      aligner_args:
        core_method: identity

    fusion_backbone: 
      resnext: true
      layer_nums: [3, 5, 8]
      layer_strides: [1, 2, 2]
      num_filters: [64, 128, 256]
      upsample_strides: [1, 2, 4]
      num_upsample_filter: [128, 128, 128]
      anchor_number: *anchor_num

    shrink_header: 
      kernal_size: [ 3 ]
      stride: [ 1 ]
      padding: [ 1 ]
      dim: [ 256 ]
      input_dim: 384 # 128 * 3

    in_head: 256
    anchor_number: *anchor_num
    dir_args: *dir_args


loss:
  core_method: point_pillar_pyramid_loss
  args:
    pos_cls_weight: 2.0
    cls:
      type: 'SigmoidFocalLoss'
      alpha: 0.25
      gamma: 2.0
      weight: 1.0
    reg:
      type: 'WeightedSmoothL1Loss'
      sigma: 3.0
      codewise: true
      weight: 2.0
    dir:
      type: 'WeightedSoftmaxClassificationLoss'
      weight: 0.2
      args: *dir_args
    depth:
      weight: 1.0
    pyramid: # foreground loss
      relative_downsample: [1, 2, 4]
      weight: [0.4, 0.2, 0.1]


optimizer:
  core_method: Adam
  lr: 0.002
  args:
    eps: 1e-10
    weight_decay: 1e-4

lr_scheduler:
  core_method: multistep #step, multistep and Exponential support
  gamma: 0.1
  step_size: [15, 30]

