video_id: 1dCd6hCRoaQ
output_dir: "outputs/demos"
# output_dir: "outputs/demos_new"

model:
  image_prefix_length: 64
  max_text_seq_length: 226
  backbone_weight: "ckpt/NVILA-8B-Video/"
  cogvideo_weight: "ckpt/CogVideoX-2b"
  add_reference_image: true
  last_clip_frame_num: 1
  uncond_prob: 0.05
  pretrained_weight: "ckpt/AnimeShooterGen/pretrained.bin"
  cogvideo_lora_weight: "ckpt/AnimeShooterGen/cogvideo_lora_1dCd6hCRoaQ.bin"
  # cogvideo_lora_weight: "outputs/weights/1dCd6hCRoaQ/checkpoint-2000/pytorch_model.bin"
  qformer_num_hidden_layers: 12

peft:
  llm:
    enabled: true
    r: 8
    lora_alpha: 16
    lora_dropout: 0.1
    task_type: "CAUSAL_LM"
  cogvideo:
    enabled: true
    r: 16
    lora_alpha: 8
    init_lora_weights: true
    target_modules: ["to_q", "to_k", "to_v", "to_out.0"]

video_kwargs:
  num_frames: 49
  width: 720
  height: 480
  num_inference_steps: 50
  guidance_scale: 6.0
  fps: 16