model:
  class_name: ilrm.IterativeLRM

  viewpoint_factor: 2

  image_tokenizer:
    image_size: 256
    patch_size: 8
    in_channels: 12

  viewpoint_tokenizer:
    image_size: 256
    patch_size: 8
    in_channels: 9
  transformer:
    d: 768
    d_head: 64
    n_layer: 24

  gaussians:
    sh_degree: 0
    near_plane: 0.01
    far_plane: 1000000.0
    scale_bias: -6.9
    scale_max: -1.2
    opacity_bias: -2.0
    max_dist: 500.0

data:
  data_path: "data/dl3dv_eval.txt"
  resize_h: 540
  resize_w: 960
  input_frame_select_type: "kmeans"
  target_frame_select_type: "uniform_every"
  num_input_frames: 32
  num_target_frames: 0
  target_has_input: false
  min_frame_dist: all
  max_frame_dist: 128
  shuffle_input_prob: 0.0
  reverse_input_prob: 0.0
  target_uniform_every: 8

# inference / evaluation
inference:
  finetune_iter: 10

  save_video: false
  save_images: false
  compute_metrics: true


  batch_size_per_gpu: 1
  dataset_name: dataset.Dataset
  num_threads: 8
  num_workers: 4
  prefetch_factor: 4  

  use_tf32: true
  use_amp: true
  amp_dtype: bf16  

  ckpt_path: ilrm_undistort_dl3dv.pt
  out_dir: ./experiments/inference/iLRM_dl3dv_eval