unet_additional_kwargs:
  unet_use_cross_frame_attention: false
  unet_use_temporal_attention:    false
  use_motion_module:              true
  motion_module_resolutions:      [1,2,4,8]
  motion_module_mid_block:        false
  motion_module_decoder_only:     false
  motion_module_type:             "Vanilla"
  
  motion_module_kwargs:
    num_attention_heads:                8
    num_transformer_block:              1
    attention_block_types:              [ "Temporal_Self", "Temporal_Self" ]
    temporal_position_encoding:         true
    temporal_position_encoding_max_len: 24
    temporal_attention_dim_div:         1

noise_scheduler_kwargs:
  beta_start:    0.00085
  beta_end:      0.012
  beta_schedule: "linear"
  steps_offset:  1
  clip_sample:   False