defaults:
    - _self_
    - trainer: base
    - lightning: base
    - data: remote_homology
    - optimization: base
    - autorestart: base
model:
    class_name: "SequenceClassificationModel"
    pretrained_ckpt_path: "" # where to load the pretrained pLM
    ckpt_path: null # Set to "auto" to resume training from a checkpoint in the same default dir.
    num_labels: 45
    dropout: 0.1 # dropout prob in SequenceClassificationHead
    num_layer: 1 # enlarge to get more complex MLP classifiers
    hidden_size: 512 # classifier hidden dim, i.e., the shape of last logits before entering softmax
    num_tokens: null
    d_model: null # hidden dimention from the tokenizer embedding dim
    is_global_or_local: global # global for whole protein property prediction; local for residue wise property prediction
    multi_label: false
    regression: false
    use_sequence: false
    sequence_only: false
    add_noise: null
deepspeed_path: src/script/config/deepspeed/32_stage2.json
default_data_dir: null
tokenizer: null
tokenizer_pretrained_ckpt_path: null
tokenizer_ckpt_name: null
max_steps: 10000
experiment_name: test # used for tensorboard
run_name: test_run
validate_only: false
test_only: false

save_dir_path: "" # would be indicated during running
tokenizer_device: "cuda" # or "cpu": where to put the tokenizer.tokenizer_encoder. 
precompute_tokens: true # if true, will precompute and cache tokenization results when setting up datamodule
quantizer_use_linear_project: false
model_encoder_dmodel: 1024
model_encoder_nlayers: 2
model_encoder_vheads: 128
quantizer_codebook_size: 4096
quantizer_codebook_embed_size: 128
model_encoder_dout: 128