data:
  train_bs: 1
  train_width: 512
  train_height: 512
  sample_rate: 4
  n_sample_frames: 24

solver:
  gradient_accumulation_steps: 1
  mixed_precision: 'fp16'
  enable_xformers_memory_efficient_attention: True
  gradient_checkpointing: True
  max_train_steps: 30000
  max_grad_norm: 1.0
  # lr
  learning_rate: 1e-5
  scale_lr: False
  lr_warmup_steps: 1
  lr_scheduler: 'constant'

  # optimizer
  use_8bit_adam: True
  adam_beta1: 0.9
  adam_beta2: 0.999
  adam_weight_decay:  1.0e-2
  adam_epsilon: 1.0e-8

val:
  validation_steps: 20


noise_scheduler_kwargs:
  num_train_timesteps: 1000
  beta_start:          0.00085
  beta_end:            0.012
  beta_schedule:       "linear"
  steps_offset:        1
  clip_sample:         false

base_model_path: './pretrained_weights/stable-diffusion-v1-5'
vae_model_path: './pretrained_weights/sd-vae-ft-mse'
image_encoder_path: './pretrained_weights/sd-image-variations-diffusers/image_encoder'
mm_path: './pretrained_weights/animatediff/mm_sd_v15_v2.ckpt'

weight_dtype: 'fp16'  # [fp16, fp32]
uncond_ratio: 0.1
noise_offset: 0.05
snr_gamma: 5.0
enable_zero_snr: True
stage1_ckpt_dir: './exp_output/stage1_rr_3'
stage1_ckpt_step: 120000

seed: 12580
resume_from_checkpoint: ''
checkpointing_steps: 2000
exp_name: 'stage2_rr_3'
output_dir: './exp_output'