defaults:
  - _self_
  - backbone_transformer
  - pretrained_struct_model: pretrained_proteina_60M_no_tri

feat_extractor: True

token_dim: 128  # dimension of the tokens in the sequence
nlayers: 3  # number of transformer layers
nheads: 6  # number of attn heads

# Parameters for the features we extract (both for sequence representatoin and conditioning vector)
t_emb_dim: 196  # dimension of the time embedding
dim_cond: 128  # dimension of conditioning vector
idx_emb_dim: 196  # dimension of the sequence position [0, 1, 2, ...] (if contiguous residues) embeddings
fold_emb_dim: 196  # dimension of fold class embedding. This will be multiplied by three, as we have C, A, T embeddings.

update_pair_repr: False  # whether to update pair representation, automatically overridden to False if `use_attn_pair_bias: False`
update_pair_repr_every_n: 3  # Update the pair representation every n layers -> For 15 layers we get 5 pair updates (if update pair represnetation is true)
use_tri_mult: False  # whether to use triangular multiplication layers in pair update, ignored if not updating pair representation

weights_path: null

feats_init_seq: ["res_seq_pdb_idx", "struct_emb_proteina"]  # Sequence features to include in initial representation
feats_cond_seq: ["lag_emb", "seq_emb_esm3", "temp_emb", "deepseek_classification_emb", "deepseek_confidence_emb"]

feats_pair_repr: ["rel_seq_sep", "xt_pair_dists"]  # Features to include in the pair representation
feats_pair_cond: []   # Features to include in the pair representation conditioning

sequence_emb_dim: 2560
structure_emb_dim: 512
lag_emb_dim: 32
max_lag: 200
lag_emb_max_positions: 200

temp_emb_dim: 16
temp_max: 500
temp_min: 250
temp_emb_max_positions: 5

deepseek_classification_emb_dim: 4
deepseek_confidence_emb_dim: 4

pair_repr_dim: 128