# This configuration should allow you to train Wan 14b t2v on 512x512x81 sized videos (or varying aspect ratios of the same size), with 24GB VRAM.

# change this
output_dir = '/data/diffusion_pipe_training_runs/tmp'
# and this
dataset = '/home/anon/code/diffusion-pipe-configs/datasets/wan/video.toml'

# training settings
epochs = 1000
micro_batch_size_per_gpu = 1
pipeline_stages = 1
gradient_accumulation_steps = 1
gradient_clipping = 1
warmup_steps = 10

# eval settings
eval_every_n_epochs = 1
eval_before_first_step = true
eval_micro_batch_size_per_gpu = 1
eval_gradient_accumulation_steps = 1

# misc settings
save_every_n_epochs = 5
checkpoint_every_n_minutes = 120
activation_checkpointing = 'unsloth'
partition_method = 'parameters'
save_dtype = 'bfloat16'
caching_batch_size = 1
steps_per_print = 1
video_clip_mode = 'single_beginning'
blocks_to_swap = 32

[model]
type = 'wan'
ckpt_path = '/data2/imagegen_models/Wan2.1-T2V-14B'
dtype = 'bfloat16'
transformer_dtype = 'float8'
timestep_sample_method = 'logit_normal'

[adapter]
type = 'lora'
rank = 32
dtype = 'bfloat16'

[optimizer]
type = 'AdamW8bitKahan'
lr = 2e-5
betas = [0.9, 0.99]
weight_decay = 0.01
stabilize = false
