DATA:
  data_root: data/matterport_3d
  data_root_2d: data/matterport_2d
  caption_path: data/caption/caption_view_matterport_vit-gpt2-image-captioning_.json
  entity_path: data/caption/small/caption_entity_matterport_vit-gpt2-image-captioning_small.json

  category_split:
    novel_category: [9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20]
    ignore_category: [255]
    base_category: [0, 1, 2, 3, 4, 5, 6, 7, 8, 13]
    all_category: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]

  all_label: ['wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table', 'door',
             'window', 'bookshelf', 'picture', 'counter', 'desk', 'curtain',
             'refridgerator', 'shower curtain', 'toilet', 'sink', 'bathtub',
             'otherfurniture', 'ceiling']

  label: ['wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table', 'door',
          'window', 'curtain']

  label_2d: [1, 2, 3, 4, 5, 6, 7, 8, 9, 16]

  test_ignore_label: [255]
  ignore_label: 21
  data_ratio: 0.5

  classes: 10
  test_classes: 21
  num_queries: 200
  scannet200: False
  scores_threshold: 0.0

  pixel_mean: [0.0, 0.0, 0.0]
  pixel_std: [255.0, 255.0, 255.0]

  aug: False
  voxel_size: 0.02
  input_color: True
  use_shm: False
  loop: 16
  val_keep: 10000000

Model:
  arch_3d: MinkUNet34C
  arch_binary_head: MinkUNet18A
  lseg_model_path: pretrained/weights/lseg/demo_e200.ckpt

  start_contra: 0
  binary_2d_thresh: 0.5
  scores_keep_thresh: 0

  lr_3d: 0.0001
  lr_others: 0.0001
  weight_decay: 0.00001
  warmup_epochs: 2
  prompt_eng: True

  loss_weight:
    loss_3d: 4
    loss_3d_pure: 4
    loss_3d_contra: 1.2
    loss_explicit_contra: 1.5
    loss_explicit_contra_3d: 1.5
    loss_explicit_contra_2d_pre: 4
    loss_binary: 12
    pseudo_label: 1
    entity_gt_loss: 1.5

  pseudo_label:
    enable: True
    temperature: 0.07
    scores_keep_thresh: 0.05
    infer_use_caption_boost: True
    infer_caption_boost_factor: 0.3
    infer_boost_only_novel_pred: False
    use_view_entities: True

  entity_gt:
    enable: True
    contrastive_temp: 0.07

  cam:
    enable: True
    alignment_dim: 512
    loss_weight: 1.5
    kl_temperature: 1.2

  mask_contra_3d: True
  caption_contra: True
  caption_contra_2d_pre: True
  caption_contra_3d: True
  use_ape: False

  mask_shape: [512,640]

  power: 0.9
  momentum: 0.9
  manual_seed: 5557
  print_freq: 10
  save_freq: 1
  eval_freq: 2
  base_ratio: 0.65
  novel_ratio: 0.25
  clip_name: "ViT-L-14"
  learning_rate_type: cosine

Distributed:
  save_path:
  resume:
  dist_url: tcp://127.0.0.1:6745
  dist_backend: 'nccl'
  multiprocessing_distributed: False
  world_size: 1
  rank: 0
  train_gpu: [0]
  workers: 1
  batch_size: 4
  batch_size_val: 1
  infer_gpu: [0]
  infer_workers: 0
  infer_batch_size_val: 1
  evaluate: True
  train_s: True
  epochs: 100
  start_epoch: 0
