train:
  vos:
    exp_name: &TRAIN_NAME "sat_res50_davis16"
    exp_save: &TRAIN_SAVE "snapshots"
    num_processes: 8
    data:
      exp_name: *TRAIN_NAME
      exp_save: *TRAIN_SAVE
      num_epochs: 20
      minibatch: &MINIBATCH 128
      num_workers: 32
      nr_image_per_epoch: &NR_IMAGE_PER_EPOCH 160000
      pin_memory: true
      datapipeline:
        name: "RegularDatapipeline"
      sampler:
        name: "TrackPairSampler"
        TrackPairSampler:
          negative_pair_ratio: 0.0
          target_type: "mask"
        submodules:
          dataset:
            names: ["COCODataset", "DavisDataset", "YoutubeVOSDataset"]  
            YoutubeVOSDataset:
              ratio: 5.0
              max_diff: 10
              subsets: ["train"]
            COCODataset:
              ratio: 1.0
              with_mask: True
              subsets: ["train2017", "val2017"]
            DavisDataset:
              ratio: 2.0
              max_diff: 10
              subsets: ["train2016"]
          filter:
            name: "TrackPairFilter"
            TrackPairFilter:
              max_area_rate: 0.75
              min_area_rate: 0.02
              max_ratio: 10
              target_type: "mask"
      transformer:
        names: ["ImageAug"]
        ImageAug:
          color_jitter_brightness: 0.05
          color_jitter_contrast: 0.05
          color_jitter_saturation: 0.05
          lighting_std: 0.05
          
      target:
        name: "SATMaskTarget"
        SATMaskTarget:
          max_scale: 0.1
          max_shift: 0.1
          max_scale_temp: 0.0
          max_shift_temp: 0.0


    tracker_model:
      backbone:
        name: "AlexNet"
      task_head:
        name: "DenseboxHead"
        DenseboxHead:
          total_stride: 8
          score_size: 17
          x_size: 303
          num_conv3x3: 3
          head_conv_bn: [False, False, True]
      task_model:
        name: "SiamTrack"
        SiamTrack:
          pretrain_model_path: "models/siamfcpp/siamfcpp-alexnet-vot-md5_18fd31a2f94b0296c08fff9b0f9ad240.pkl"
          corr_fea_output: True

    segmenter:
      encoder_basemodel:
        name: "ResNet50_M"
        ResNet50_M:
          pretrain_model_path: "models/resnet/res-50-imagenet-fff49.pkl"
      encoder:
        name: "JointEncoder"
        JointEncoder:
          pretrain_model_path: ""
      gml_extractor:
        name: "ResNet50_M"
        ResNet50_M:
          pretrain_model_path: "models/resnet/res-50-imagenet-fff49.pkl"
      task_head:
        name: "DecoderHead"
      losses:
        names: ["MultiBCELoss"]
      task_model:
        name: "SatVOS"
      use_sync_bn: True
    trainer:
      name: "DistributedSATTrainer"
      DistributedSATTrainer:
        exp_name: *TRAIN_NAME
        exp_save: *TRAIN_SAVE
        max_epoch: 20
        minibatch: *MINIBATCH
        nr_image_per_epoch: *NR_IMAGE_PER_EPOCH
        snapshot: ""
      monitors:
        names: ["SegMetric", "TextInfo"]
        TextInfo:
          {}
        SegMetric:
          interval: 100
        TensorboardLogger:
          exp_name: *TRAIN_NAME
          exp_save: *TRAIN_SAVE

    optim:
      optimizer:
        name: "SGD"
        SGD:
          lr: 0.004
          momentum: 0.9
          weight_decay: 0.00005
          minibatch: *MINIBATCH
          nr_image_per_epoch: *NR_IMAGE_PER_EPOCH
          lr_policy:
          - >
            {
            "name": "LinearLR",
            "start_lr": 0.00001,
            "end_lr": 0.08,
            "max_epoch": 2
            }          
          - >
            {
            "name": "CosineLR",
            "start_lr": 0.08,
            "end_lr": 0.000001,
            "max_epoch": 20
            }
test:
  vos:
    exp_name: &TEST_NAME "sat_res50_davis16"
    exp_save: &TEST_SAVE "logs"
    tracker_model:
      backbone:
        name: "AlexNet"
      losses:
        names: []
      task_head:
        name: "DenseboxHead"
        DenseboxHead:
          total_stride: 8
          score_size: 17
          x_size: 303
          num_conv3x3: 3
          head_conv_bn: [False, False, True]
      task_model:
        name: "SiamTrack"
        SiamTrack:
          corr_fea_output: True
          pretrain_model_path: "models/siamfcpp/siamfcpp-alexnet-vot-md5_18fd31a2f94b0296c08fff9b0f9ad240.pkl"
    tracker_pipeline:
      name: "SiamFCppTracker"
      SiamFCppTracker:
        test_lr: 0.512
        window_influence: 0.254
        penalty_k: 0.064
        num_conv3x3: 3
        corr_fea_output: True
    segmenter:
      encoder_basemodel:
        name: "ResNet50_M"
        ResNet50_M:
          pretrain_model_path: ""
      encoder:
        name: "JointEncoder"
        JointEncoder:
          pretrain_model_path: ""
      gml_extractor:
        name: "ResNet50_M"
        ResNet50_M:
          pretrain_model_path: ""
      task_head:
        name: "DecoderHead"
        DecoderHead:
          input_channel_list: [512, 256, 128, 64]
      losses:
        names: []
      task_model:
        name: "SatVOS"
        SatVOS:
          pretrain_model_path: "snapshots/sat_res50_davis16/epoch-19.pkl"
    pipeline:
      name: "StateAwareTracker"
      StateAwareTracker:
        save_patch: &save_patch False
        mask_pred_thresh: 0.4
        mask_filter_thresh: 0.4
        state_score_thresh: 0.7
        seg_ema_u: 0.65
        seg_ema_s: 0.55
        track_failed_score_th: 0.1
        update_global_fea_th: 0.15
    tester:
        names: ["DAVISTester",]
        DAVISTester:
          exp_name: *TEST_NAME
          exp_save: *TEST_SAVE
          device_num: 4
          dataset_names: ["DAVIS2016"]
          save_video: False
          save_patch: *save_patch
