data:
  train_bs: 1
  train_width: 640 
  train_height: 384 
  meta_paths:
    - "./all_data_config/train.json"
  sample_rate: 2
  n_sample_frames: 15

solver:
  gradient_accumulation_steps: 1
  mixed_precision: 'fp16'
  enable_xformers_memory_efficient_attention: True 
  gradient_checkpointing: False 
  max_train_steps: 30000
  max_grad_norm: 1.0
  # lr
  learning_rate: 1e-5
  scale_lr: False 
  lr_warmup_steps: 1
  lr_scheduler: 'constant'

  # optimizer
  use_8bit_adam: True 
  adam_beta1: 0.9
  adam_beta2: 0.999
  adam_weight_decay:  1.0e-2
  adam_epsilon: 1.0e-8

val:
  validation_steps: 10000


noise_scheduler_kwargs:
  num_train_timesteps: 1000
  beta_start:          0.00085
  beta_end:            0.012
  beta_schedule:       "linear"
  steps_offset:        1
  clip_sample:         false

base_model_path: './pretrained_weights/stable-diffusion-v1-5'
vae_model_path: './pretrained_weights/sd-vae-ft-mse'
image_encoder_path: './pretrained_weights/sd-image-variations-diffusers/image_encoder'
mm_path: './pretrained_weights/mm_sd_v15_v2.ckpt'

denoising_unet_path: "./pretrained_weights/denoising_unet.pth"
reference_unet_path: "./pretrained_weights/reference_unet.pth"
pose_guider_path: "./pretrained_weights/pose_guider.pth"
motion_module_path: "./pretrained_weights/motion_module.pth"

use_audio_module: False

weight_dtype: 'fp16'  # [fp16, fp32]
uncond_ratio: 0.1
noise_offset: 0.05
snr_gamma: 5.0
enable_zero_snr: True 
stage1_ckpt_dir: './exp_all_final/CovOG_stage1'
stage1_ckpt_step: 26560

seed: 12580
resume_from_checkpoint: ''
checkpointing_steps: 2000
exp_name: 'CovOG_stage2_wo_IAD'
output_dir: './exp_all_final' 