zipf_offset: 5
outputs: exp/opensora/baseline
profile_path: exp/opensora/profile/baseline
sp_size: 4
dummy_dataset: true
dummy_data_size: 5000
verbose: true
calculate_imbalance: true


# ==== training config ====

# preprocess embedding
data_path: "./assets/example_data/demo_preprocess.csv"
preprocessed_data: true
drop_last: true

# train
ckpt_path: "hpcai-tech/OpenSora-STDiT-v3"
grad_checkpoint: True
num_workers: 8
dtype: "bf16"

# log
seed: 42
epochs: 1
log_every: 1e10

# optimization
grad_clip: 1.0
lr: 1e-8
ema_decay: 0.99
adam_eps: 1e-15
warmup_steps: 10

# data
# image_mixing_frac: 50
num_bucket_build_workers: 16
bucket_config:
  "144p": {1: [1.0, 345], 51: [1.0, 48], 102: [1.0, 25], 204: [1.0, 12], 408: [1.0, 6]}
  "240p": {1: [1.0, 128], 51: [1.0, 16], 102: [1.0, 8], 204: [1.0, 4], 408: [1.0, 2]}
  "360p": {1: [1.0, 64], 51: [1.0, 7], 102: [1.0, 4], 204: [1.0, 2], 408: [1.0, 1]}
  "480p": {1: [1.0, 32], 51: [1.0, 4], 102: [1.0, 2], 204: [1.0, 1], 408: [1.0, 1]}
  "720p": {1: [1.0, 14], 51: [1.0, 1], 102: [1.0, 1], 204: [1.0, 1], 408: [1.0, 1]}

# override default common ar
# for benchmark, we use single ar for all resolutions
# otherwise the data will be too sparse
common_ar:
  "144p": {"0.56": [144, 256]}
  "240p": {"0.56": [240, 426]}
  "360p": {"0.56": [360, 640]}
  "480p": {"0.56": [480, 854]}
  "720p": {"0.56": [720, 1280]}

# mask
mask_ratios: {
    "random": 0.01,
    "intepolate": 0.002,
    "quarter_random": 0.002,
    "quarter_head": 0.002,
    "quarter_tail": 0.002,
    "quarter_head_tail": 0.002,
    "image_random": 0.0,
    "image_head": 0.22,
    "image_tail": 0.005,
    "image_head_tail": 0.005,
}
