# Data paths
results_folder: <path to train results>
protein_data_path: None

saved_dataset_path_AFDB: <path to train dataset>
use_foldseek_sequences: True
add_foldseek_embeddings: False


# model
esm_checkpoint: 'westlake-repl/SaProt_650M_AF2'
min_protein_length: 30 
max_protein_length: -1 
num_struct_embeddings_layers: 1
use_struct_embeddings: True 
predict_contacts: 'none'
predict_angles: False
mask_angle_inputs_with_plddt: True

use_sorted_batching: True
batch_limit: 7000

# training
num_train_epochs: 10
train_batch_size: 1 
eval_batch_size: 1
logging_steps: 400
eval_steps: 5000
lr_scheduler_type: 'constant' 
warmup_ratio: 0.
learning_rate: 0.000005 

# trainer
train_split: train
lr_decrease_ratio: 0.2 

trained_adapter_name: None
