name: "zero123-simple"
tag: "${system.guidance.guidance_type}_${data.random_camera.height}_${rmspace:${basename:${data.image_path}},_}"
exp_root_dir: "outputs"
seed: 0

data_type: "single-image-datamodule"
data:
  image_path: ???
  height: 256
  width: 256
  default_elevation_deg: 0.0
  default_azimuth_deg: 0.0
  default_camera_distance: 2.5
  default_fovy_deg: 40.0
  random_camera:
    batch_size: 1
    height: 256
    width: 256
    elevation_range: [-45, 45]
    azimuth_range: [-180, 180]
    camera_distance_range: [2.5, 2.5]
    fovy_range: [40.0, 40.0]
    camera_perturb: 0.0
    center_perturb: 0.0
    up_perturb: 0.0
    eval_height: 512
    eval_width: 512
    eval_elevation_deg: ${data.default_elevation_deg}
    eval_camera_distance: ${data.default_camera_distance}
    eval_fovy_deg: ${data.default_fovy_deg}
    n_val_views: 4
    n_test_views: 120

system_type: "zero123-simple-system"
system:
  geometry_type: "implicit-volume"
  geometry:
    radius: 1.0
    normal_type: analytic

    # use Magic3D density initialization
    density_bias: "blob_magic3d"
    density_activation: softplus
    density_blob_scale: 10.
    density_blob_std: 0.5

    pos_encoding_config:
      otype: ProgressiveBandHashGrid
      n_levels: 16
      n_features_per_level: 2
      log2_hashmap_size: 19
      base_resolution: 16
      per_level_scale: 1.447269237440378 # max resolution 4096
      start_level: 8 # resolution ~200
      start_step: 2000
      update_steps: 300
    mlp_network_config:
      otype: "VanillaMLP"
      activation: "ReLU"
      output_activation: "none"
      n_neurons: 64
      n_hidden_layers: 2

  material_type: "no-material"
  material:
    requires_normal: true

  background_type: "solid-color-background"

  renderer_type: "nerf-volume-renderer"
  renderer:
    radius: ${system.geometry.radius}
    num_samples_per_ray: 256
    return_comp_normal: true

  guidance_type: "zero123-unified-guidance"
  guidance:
    guidance_type: "sds"
    # guidance_type: "vsd" # switch to this line to use vsd
    pretrained_model_name_or_path: "bennyguo/zero123-diffusers"
    guidance_scale: 5.0
    # empirical timestep annealing
    min_step_percent: 0.1
    max_step_percent: [5000, 0.9, 0.5, 5001]

    cond_image_path: ${data.image_path}
    cond_elevation_deg: ${data.default_elevation_deg}
    cond_azimuth_deg: ${data.default_azimuth_deg}
    cond_camera_distance: ${data.default_camera_distance}

    # debug use, may slowdown training
    return_rgb_1step_orig: true

    # for vsd
    vsd_phi_model_name_or_path: null
    vsd_use_camera_condition: false
    use_img_loss: false

  loggers:
    wandb:
      enable: false
      project: "threestudio"
      name: None

  loss:
    lambda_sd: 1.0
    lambda_train_phi: 1.0 # for vsd
    lambda_orient: 0.
    lambda_normal_smoothness_2d: 1000.
    lambda_sparsity: 0.
    lambda_opaque: 0.

  optimizer:
    name: Adam
    args:
      lr: 0.01
      betas: [0.9, 0.99]
      eps: 1.e-8
    params:
      geometry.encoding:
        lr: 0.01
      geometry.density_network:
        lr: 0.001
      geometry.feature_network:
        lr: 0.001
      # for vsd
      guidance:
        lr: 0.0001

trainer:
  max_steps: 10000
  log_every_n_steps: 1
  num_sanity_val_steps: 1
  val_check_interval: 100
  enable_progress_bar: true
  precision: 32

checkpoint:
  save_last: true # save at each validation time
  save_top_k: -1
  every_n_train_steps: ${trainer.max_steps}
