name: mvgen-i2mv-experiments
tags: ["lvis96v"]
description: ""
version: lvis4w-f16k16-randv-n8-pabsref
output_dir: "outputs/${name}"

seed: 42
resume: null

extras:
  k_near_views: 16

data:
  _target_: src.data.multiview.MultiViewDataModule
  train_dataset:
    _target_: src.data.multiview.MultiViewDataset
    root_dir: /mnt/pfs/data/render_lvis_hzh
    num_views: 16
    bg_color: white
    img_wh: [256, 256]
    k_near_views: ${extras.k_near_views}
    sample_views_mode: random
    caption_path: /mnt/pfs/data/render_lvis_hzh/caption_train.txt
  train_batch_size: 1
  val_dataset:
    - _target_: src.data.multiview.MultiViewDataset
      root_dir: /mnt/pfs/data/render_lvis_hzh
      num_views: 16
      bg_color: white
      img_wh: [256, 256]
      k_near_views: ${extras.k_near_views}
      sample_views_mode: lay4
      caption_path: /mnt/pfs/data/render_lvis_hzh/caption_val.txt
    - _target_: src.data.multiview.MultiViewDataset
      root_dir: /mnt/pfs/data/render_lvis_hzh
      num_views: 16
      bg_color: white
      img_wh: [256, 256]
      k_near_views: ${extras.k_near_views}
      sample_views_mode: fixed_random
      caption_path: /mnt/pfs/data/render_lvis_hzh/caption_val.txt
  val_batch_size: 1
  test_dataset:
    - _target_: src.data.multiview.MultiViewDataset
      root_dir: /mnt/pfs/data/GSO/render_96v
      num_views: 16
      bg_color: white
      img_wh: [256, 256]
      k_near_views: ${extras.k_near_views}
      sample_views_mode: lay4
      caption_path: /mnt/pfs/data/GSO/gso_captions.txt
    - _target_: src.data.multiview.MultiViewDataset
      root_dir: /mnt/pfs/data/GSO/render_96v
      num_views: 16
      bg_color: white
      img_wh: [256, 256]
      k_near_views: ${extras.k_near_views}
      sample_views_mode: fixed_random
      caption_path: /mnt/pfs/data/GSO/gso_captions.txt
  test_batch_size: 1
  num_workers: 64
  pin_memory: True

system:
  _target_: src.systems.i2mv_system.I2MVSystem
  base_model_id: "bennyguo/zero123-xl-diffusers"
  variant: "fp16_ema"
  mv_model:
    _target_: src.models.mv_model.MVModel
    _partial_: true
    cp_block_model:
      _target_: src.models.extras.feature_aggregator.FeatureAggregator
      _partial_: true
      depth: 2
      use_resnet: true
      use_fhw_attn: false
      pos_encs: ["abs", "ref"]
    insert_stages: ["mid", "up"]
    insert_up_layers: [0, 1, 2, 3]
    use_residual: false
    render_options:
      n_samples: 8
  lr: 1e-5
  cfg: 0.1
  report_to: wandb

trainer:
  _target_: lightning.pytorch.trainer.Trainer
  default_root_dir: ${output_dir}
  max_steps: 20001
  val_check_interval: 0.5
  # check_val_every_n_epoch: 1
  log_every_n_steps: 10
  num_sanity_val_steps: 1
  enable_progress_bar: true
  strategy: ddp_find_unused_parameters_true
  accelerator: gpu
  precision: 16-mixed # mixed precision for extra speed-up
  accumulate_grad_batches: 8
  gradient_clip_val: 1.0

callbacks:
  model_checkpoint:
    _target_: lightning.pytorch.callbacks.ModelCheckpoint
    save_top_k: -1
    every_n_train_steps: 5000

logger:
  # tensorboard:
  #   _target_: lightning.pytorch.loggers.tensorboard.TensorBoardLogger
  #   save_dir: "${output_dir}"
  #   name: ""
  #   version: "${version}"
  #   sub_dir: "tb_logs"
  wandb:
    _target_: lightning.pytorch.loggers.wandb.WandbLogger
    project: "${name}"
    save_dir: "outputs"
    name: "${version}"
