data:
  train_bs: 1
  train_width: 640 
  train_height: 384 
  meta_paths:
    - "./data_config/stage_1.json"
  sample_rate: 4 
  n_sample_frames: 16

solver:
  gradient_accumulation_steps: 1
  mixed_precision: 'fp16'
  enable_xformers_memory_efficient_attention: True 
  gradient_checkpointing: False 
  max_train_steps: 10000
  max_grad_norm: 1.0
  # lr
  learning_rate: 1e-5
  scale_lr: False 
  lr_warmup_steps: 1
  lr_scheduler: 'constant'

  # optimizer
  use_8bit_adam: True 
  adam_beta1: 0.9
  adam_beta2: 0.999
  adam_weight_decay:  1.0e-2
  adam_epsilon: 1.0e-8

val:
  validation_steps: 500


noise_scheduler_kwargs:
  num_train_timesteps: 1000
  beta_start:          0.00085
  beta_end:            0.012
  beta_schedule:       "linear"
  steps_offset:        1
  clip_sample:         false

base_model_path: './pretrained_weights/stable-diffusion-v1-5'
vae_model_path: './pretrained_weights/sd-vae-ft-mse'
image_encoder_path: './pretrained_weights/sd-image-variations-diffusers/image_encoder'
mm_path: './pretrained_weights/mm_sd_v15_v2.ckpt'


weight_dtype: 'fp16'  # [fp16, fp32]
uncond_ratio: 0.1
noise_offset: 0.05
snr_gamma: 5.0
enable_zero_snr: True 
stage1_ckpt_dir: './exp_output/stage1_mask'
stage1_ckpt_step: 20700

seed: 12580
resume_from_checkpoint: ''
checkpointing_steps: 1000
exp_name: 'stage2_mask'
output_dir: './exp_output' 