args:
#  image2video: True  # True for image2video, False for text2video
  latent_channels: 16
  mode: inference
  load: "{your CogVideoX SAT folder}/transformer" # This is for Full model without lora adapter
  batch_size: 1
  input_type: txt
  input_file: configs/test.txt
  sampling_image_size: [768, 1360] # remove this for I2V
  sampling_num_frames: 22  # 42 for 10 seconds and 22 for 5 seconds
  sampling_fps: 16
  bf16: True
  output_dir: outputs
  force_inference: True
