_target_: src.models.nn.ProteinTransformerAF3

use_compile: False
finetune: 0
out_feat_to_return: 'seq' # or 'pair'
return_layer_idx: -1 # -1 means the last layer
t_value_if_missing: 1 # if t is missing in the input, use this value

# Architecture parameters
residual_mha: True  # whether to use a residual connection in the mha
residual_transition: True  # whether to use a residual connection in the transition
parallel_mha_transition: False  # whether to compute mha and transition as parallel and add them up (AF3 style) or sequentially (normal transofrmers)
use_attn_pair_bias: True  # whether to bias attention using a bias coming from a pair representation

strict_feats: True  # if False, then fills missing features with default values (e.g. chain break with zero, residue sequence index by [0, 1, 2, ...], etc)
# If True, if some feature is not provided, then it raises an error

# Parameters for the features we extract (both for sequence representatoin and conditioning vector)
cath_code_dir: ${paths.project_data_dir}/proteina/pdb_raw/    # This should be set as the path to your pdb_cath dataset directory
multilabel_mode: "sample"

# Parameters for the pair features we extract
# Binning for the pair distances of noisy xt
xt_pair_dist_dim: 64
xt_pair_dist_min: 0.1  # in nm (not Å)
xt_pair_dist_max: 3  # in nm (not Å)
# Binning for the pair distances for self conditioning
x_sc_pair_dist_dim: 128
x_sc_pair_dist_min: 0.1  # in nm (not Å)
x_sc_pair_dist_max: 3  # in nm (not Å)
# Relative sequence separation
seq_sep_dim: 127  # should be odd >= 5
# Dimension of final pair representation

num_registers: 10
use_qkln: True

num_buckets_predict_pair: 64

token_dim: 512  # dimension of the tokens in the sequence
nlayers: 12  # number of transformer layers
nheads: 12  # number of attn heads
feat_extractor: True

# Parameters for the features we extract (both for sequence representatoin and conditioning vector)
t_emb_dim: 196  # dimension of the time embedding
dim_cond: 128  # dimension of conditioning vector
idx_emb_dim: 196  # dimension of the sequence position [0, 1, 2, ...] (if contiguous residues) embeddings
fold_emb_dim: 196  # dimension of fold class embedding. This will be multiplied by three, as we have C, A, T embeddings.

update_pair_repr: False  # whether to update pair representation, automatically overridden to False if `use_attn_pair_bias: False`
update_pair_repr_every_n: 3  # Update the pair representation every n layers -> For 15 layers we get 5 pair updates (if update pair represnetation is true)
use_tri_mult: False  # whether to use triangular multiplication layers in pair update, ignored if not updating pair representation

weights_path: ${paths.project_data_dir}/proteina/proteina_v1.3_DFS_60M_notri.ckpt  # if none, train it from scratch

feats_init_seq: ["res_seq_pdb_idx", "x_sc", "chain_break_per_res"]  # Sequence features to include in initial representation
feats_cond_seq: ["time_emb", "fold_emb"] # Sequence features to include in conditioning variables

feats_pair_repr: ["rel_seq_sep", "x_sc_pair_dists", "xt_pair_dists"]  # Features to include in the pair representation
feats_pair_cond: ["time_emb"]   # Features to include in the pair representation conditioning

pair_repr_dim: 196