DATA:
  data_root: data/matterport_3d_40
  data_root_2d: data/matterport_2d
  caption_path: data/caption/caption_view_matterport_vit-gpt2-image-captioning_.json
  entity_path: data/caption/small/caption_entity_matterport_vit-gpt2-image-captioning_small.json
  category_split:
    novel_category: [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]
    ignore_category: [255]
    base_category: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
    all_category: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]

  all_label: ['wall', 'door', 'ceiling', 'floor', 'picture', 'window', 'chair', 'pillow', 'lamp', 'cabinet',
              'curtain', 'table', 'plant', 'mirror', 'towel', 'sink', 'shelves', 'sofa', 'bed', 'night stand',
              'toilet', 'column', 'banister', 'stairs', 'stool', 'vase', 'television', 'pot', 'desk', 'box',
              'coffee table', 'counter', 'bench', 'garbage bin', 'fireplace', 'clothes', 'bathtub', 'book', 'air vent', 'faucet']

  label: ['wall', 'door', 'ceiling', 'floor', 'picture', 'window', 'chair', 'pillow', 'lamp', 'cabinet',
          'curtain', 'table', 'plant', 'mirror', 'towel', 'sink', 'shelves', 'sofa', 'bed', 'night stand']

  label_2d: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40]

  test_ignore_label: [255]
  ignore_label: 255
  data_ratio: 0.5

  classes: 20
  test_classes: 40
  num_queries: 200
  scannet200: False
  scores_threshold: 0.0

  pixel_mean: [0.0, 0.0, 0.0]
  pixel_std: [255.0, 255.0, 255.0]

  aug: False
  voxel_size: 0.02
  input_color: True
  use_shm: False
  loop: 16
  val_keep: 10000000

Model:
  arch_3d: MinkUNet34C
  arch_binary_head: MinkUNet18A
  lseg_model_path: pretrained/weights/lseg/demo_e200.ckpt

  start_contra: 0
  binary_2d_thresh: 0.5
  scores_keep_thresh: 0

  lr_3d: 0.0001
  lr_others: 0.0001
  weight_decay: 0.00001
  warmup_epochs: 2
  prompt_eng: True

  loss_weight:
    loss_3d: 4
    loss_3d_pure: 4
    loss_3d_contra: 1
    loss_explicit_contra: 1.5
    loss_explicit_contra_3d: 1.5
    loss_explicit_contra_2d_pre: 4
    loss_binary: 15
    pseudo_label: 1
    entity_gt_loss: 1.5

  pseudo_label:
    enable: True
    temperature: 0.07
    scores_keep_thresh: 0.05
    infer_use_caption_boost: True
    infer_caption_boost_factor: 0.3
    infer_boost_only_novel_pred: False
    use_view_entities: True

  entity_gt:
    enable: True
    contrastive_temp: 0.07

  cam:
    enable: True
    alignment_dim: 512
    loss_weight: 1.5
    kl_temperature: 1.2

  mask_contra_3d: True
  caption_contra: True
  caption_contra_2d_pre: True
  caption_contra_3d: True
  use_ape: False

  mask_shape: [512,640]

  power: 0.9
  momentum: 0.9
  manual_seed: 5557
  print_freq: 10
  save_freq: 1
  eval_freq: 2
  base_ratio: 0.5
  novel_ratio: 0.5
  clip_name: "ViT-L-14"
  learning_rate_type: cosine

Distributed:
  save_path:
  resume:
  dist_url: tcp://127.0.0.1:6745
  dist_backend: 'nccl'
  multiprocessing_distributed: False
  world_size: 1
  rank: 0
  train_gpu: [0]
  workers: 1
  batch_size: 4
  batch_size_val: 1
  infer_gpu: [0]
  infer_workers: 0
  infer_batch_size_val: 1
  evaluate: True
  train_s: True
  epochs: 100
  start_epoch: 0
