model:
    model_id: "flux"
    n_text_tokens: 512
sparse_plan:  # the offline sparse_plan
    # empty:
    #     max_threshold: 5.e-3
    # permute:
    #     max_threshold: 1.e-3
    #     sparse_percentage: 0.9
    #     # block_size_compensation: 0.9
    # sparse:
    #     sparse_type: "threshold"   # ['threshold', 'mean']
    #     sparse_block_size: 2  # the base block_sparse_size is [17K//13, 17K//6, 17K//9]
    #     # only pair with online, override the sparse_block_size in sparse_plan. 
    #     # (!) it is the actual block_size, without considering attn_ds_rate since it does not relate with the offline calib_data downsampletd
    #     online_sparse_block_size: 128  
    #     max_threshold: 2.e-3
    #     sum_threshold: 5.e-2
    #     block_sum_k: 5
    #     # block_sum_ratio: 0.25
calib_data:
    #save_path: "attn_155_t10_promp_2b"
    qkv: False
    attn_map: True
    attn_ds_type: "part"  # ['part', 'reduce_max']
    attn_ds_rate: [1,4,4]
    qkv_ds_rate: 4
    qkv_ds_type: "part"  # ['part', 'reduce_max']
remain_fp_regex: "patch_embed|time_embedding|norm_out|proj_out|norm1|norm2|timestep_embedder|norm|linear_1|linear_2"   
#weight:
    #n_bits: 8
    #sym: False
#act:
    #n_bits: 8
    #sym: True
attn:
    # exp_of_two_softmax: False
    head_split_num: 6   # 8 for sparse_only
    # sparse:
        # plan: './configs/sparse_plan/debug.pth'
        # reorder: True
        #block_sparse: True
        #online: True
        #empty_head: True
        #empty_head_procssor: 'zero'  # ['zero','uniform']
    # -----------------------------
    # qk:
    #     n_bits: 8
    #     sym: True
    # v:
    #     n_bits: 4
    #     sym: True
    # attn_map:  # V*attn_map_post_softmax quantized
    #     n_bits: 4
    #     sym: True
    #     block_size: 64  # usually the same as "online_sparse_block_size"
    #     group: 'block'
    # -----------------------------