# Template config to finetune PLM on custom dataset.

name: "plm_3b_sft"
dump_dir: ./plm_3b_sft
steps: 200
seed: 777
optim:
    lr: 4e-5
    warmup: 120
    lr_min_ratio: 0.01
    clip: 1.0

distributed:
    fsdp_type: full_shard
    compile: false
    model_dtype: bf16
    matmul_allow_tf32: false
    selective_activation_checkpointing: false
    full_activation_checkpointing: true
    tp_size: 1

model:
    dim: 3072
    n_layers: 28
    n_heads: 24
    n_kv_heads: 8
    vocab_size: 128256
    ffn_dim_multiplier: 1.0
    multiple_of: 256
    norm_eps: 1e-05
    rope_theta: 500000.0
    weight_tying: true
    rope_scale_factor: 32
    high_freq_factor: 4
    max_seqlen: 3072
    freeze_language_model: false
    freeze_vision_model: false
    pooling_ratio: 2
    vision_model:
        image_size: 448
        patch_size: 14
        width: 1024
        layers: 23
        heads: 16
        use_cls_token: true
        use_abs_posemb: true
        mlp_ratio: 4.0
        ls_init_value: 0.1
        drop_path: 0.1
        use_ln_post: false
        pool_type: "none"
    mlp_init:
        use_gaussian: true

data:
    datamix: dummy_image:1,dummy_multi_image:1,dummy_image_region:1,dummy_video:1,dummy_text:1,dummy_stc_RDCap:1,dummy_stc_RCap:1,dummy_stc_RTLoc:1
    num_workers: 4
    batch_size: 2
    image_res: 448
    max_num_tiles: 36
    max_video_frames: 32
    vision_input_type: thumb+tile
    tokenizer_path: facebook/Perception-LM-3B/tokenizer.model
    tokenizer_name: plmchat
    conversation_format: plm_sft

profiling:
    run: false

checkpoint:
    dump:
        every: 500
        keep: 1
    init_ckpt_path: facebook/Perception-LM-3B
    is_consolidated_model: True

logging:
    freq: 10
    level: INFO  # Available choices for logging level are: [NOTSET, DEBUG, INFO, WARN, ERROR, FATAL, CRITICAL]
