name: "imagecondition"
tag: "${rmspace:${system.prompt_processor.prompt},_}"
exp_root_dir: "outputs"
seed: 0

data_type: "single-image-datamodule"
data:
  image_path: ./load/images/hamburger_rgba.png
  requires_depth: ${cmaxgt0orcmaxgt0:${system.loss.lambda_depth},${system.loss.lambda_depth_rel}}
  requires_normal: ${cmaxgt0:${system.loss.lambda_normal}}
  random_camera:
    batch_size: 4
    eval_height: 256
    eval_width: 256
    eval_elevation_deg: 0.
    eval_camera_distance: 1.2
    eval_fovy_deg: 60.
    n_val_views: 30
    n_test_views: 120

system_type: "image-condition-dreamfusion-system"
system:
  geometry_type: "implicit-volume"
  geometry:
    isosurface_method: "mc-cpu"
    isosurface_resolution: 128
    isosurface_threshold: 5.0
    normal_type: "finite_difference"
    finite_difference_normal_eps: 0.004
    mlp_network_config:
      otype: "VanillaMLP"
      activation: "ReLU"
      output_activation: "none"
      n_neurons: 64
      n_hidden_layers: 2

  material_type: "diffuse-with-point-light-material"
  material:
    diffuse_prob: 1.0
    textureless_prob: 0.2
    ambient_light_color: [1.0, 1.0, 1.0]
    diffuse_light_color: [0.0, 0.0, 0.0]
    ambient_only_steps: ${system.freq.ref_only_steps}

  background_type: "neural-environment-map-background"
  background:
    dir_encoding_config:
      otype: ProgressiveBandFrequency
      n_frequencies: 6
    mlp_network_config:
      otype: VanillaMLP
      n_neurons: 32
      n_hidden_layers: 1
      activation: "ReLU"

  renderer_type: "nerf-volume-renderer"
  renderer:
    num_samples_per_ray: 512

  prompt_processor_type: "stable-diffusion-prompt-processor"
  prompt_processor:
    pretrained_model_name_or_path: "runwayml/stable-diffusion-v1-5"
    prompt: "a DSLR photo of a delicious hamburger"

  guidance_type: "stable-diffusion-guidance"
  guidance:
    pretrained_model_name_or_path: "runwayml/stable-diffusion-v1-5"
    guidance_scale: 100.
    weighting_strategy: sds # sds, uniform, fantasia3d

  # prompt_processor_type: "deep-floyd-prompt-processor"
  # prompt_processor:
  #   pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0"
  #   prompt: "a DSLR photo of a delicious hamburger"

  # guidance_type: "deep-floyd-guidance"
  # guidance:
  #   pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0"
  #   guidance_scale: 20.
  #   weighting_strategy: sds # sds, uniform, fantasia3d

  freq:
    n_ref: 2
    ref_only_steps: 100
    guidance_eval: 0

  loggers:
    wandb:
      enable: false
      project: 'threestudio'
      name: None

  loss:
    lambda_sds: 0.1
    lambda_rgb: 10.
    lambda_mask: 1.
    lambda_depth: [0.0, 0.0, 1.0, 10000]
    lambda_depth_rel: [0.0, 0.0, 1.0, 10000]
    lambda_normal: 0. # [0, 0, 0.05, 100]
    lambda_normal_smooth: 0.0
    lambda_3d_normal_smooth: 0.0
    lambda_orient: 0.0
    lambda_sparsity: 0.0
    lambda_opaque: 0.0

  optimizer:
    name: Adan
    args:
      eps: 1.0e-8
      weight_decay: 2.0e-5
      max_grad_norm: 5.0
      foreach: False
    params:
      geometry.encoding:
        lr: 0.05
      geometry.density_network:
        lr: 0.005
      geometry.feature_network:
        lr: 0.005
      background.network:
        lr: 0.005

trainer:
  max_steps: 10000
  log_every_n_steps: 1
  num_sanity_val_steps: 0
  val_check_interval: 100
  limit_val_batches: 6
  enable_progress_bar: true
  precision: 16-mixed

checkpoint:
  save_last: true # save at each validation time
  save_top_k: -1
  every_n_train_steps: ${trainer.max_steps}
