scene:
  scene_path: "data/ScanNet"
  dataset_name: "scannet20"
  test_cameras: False
  colmap_images: "images"
  colmap_eval_hold: 8
  downscale_ratio: 1
  white_background: False
  device: "cuda:0"

pipeline:
  convert_shs_python: False
  compute_cov3d_python: False
  debug: False
  seed: 0

model:
  sh_degree: 3
  model_dir: "gaussians_model"
  dynamic: False
  load_iteration: -1
  device: "cuda:0"
  num_workers: 0

fusion:
  # img_dim: [1296, 968]
  img_dim: [648, 484]
  num_workers: 8
  model_2d: lseg # choose from openseg, lseg, samclip, vlpart
  depth: render # choose from image, render, surface, none
  depth_scale: 1000.0
  visibility_threshold: 0.05
  cut_boundary: 10
  n_split_points: 999999999 # train: 80000, eval: 999999999 (large enough)
  num_rand_file_per_scene: 1 # train: 5, eval: 1
  out_dir: "output_fusion"

caption_head:
  NAME: CaptionHead
  POOLING_TYPE: avg
  FEAT_NORM: True
  LOGIT_SCALE:
    value: 100.0
    learnable: True

  CUDA_ENABLED: True
  POOL_OBJ: score # [feat, score]
  LOSS_FUNC: NLL_NoReduce
  LOSS_WEIGHT:
    SCENE: 0.1
    VIEW: 0.5
    ENTITY: 0.1

  DIV_N_CAP: False
  DIV_MODE: none # [none, square, log2]
  NOVEL_GRAD_ONLY: True

CAPTION_INFO:
  KEY: [SCENE, VIEW, ENTITY]

  SCENE:
    ENABLED: False
    GATHER_CAPTION: True

  VIEW:
    ENABLED: True
    IMAGE_CORR_PATH: data/caption/small/scannet_view_matching_idx.pkl
    SELECT: ratio
    NUM: 1
    RATIO: 0.5
    GATHER_CAPTION: True

  ENTITY:
    ENABLED: True
    IMAGE_CORR_PATH: data/caption/small/caption_idx_small.pickle
    SELECT: ratio
    NUM: 1
    RATIO: 1.0
    GATHER_CAPTION: True

  CAPTION_CORR_PATH_IN_ONE_FILE: True