CASE: amino_GNN
ACTION: train_ec50_regression_masked_pmap
# ----- Model config -----:
MODEL_NAME: ASMI
ATOM_FEATURES:
  - AtomicNum
  - ChiralTag
  - Hybridization
  - FormalCharge
  - NumImplicitHs
  - ExplicitValence
  - Mass
  - IsAromatic
BOND_FEATURES:
  - BondType
  - Stereo
  - IsAromatic
OUT_FEATURES: 2
RESTORE_FILE: null
# ----- HuggingFace -----:
HUGGINGFACE_CACHE_DIR: /mnt/ProtLig_GPCRclassA/ProtLig_GPCRclassA/.cache
# ----- Graph config -----:
LINE_GRAPH: false
LINE_GRAPH_MAX_SIZE_MULTIPLIER: 5
SELF_LOOPS: false
# ----- Loader config -----:
LOADER_OUTPUT_TYPE: tf
CACHE: true
CACHE_SEQ_LOOKUP: true
SHUFFLE_BUFFER_SIZE: 16384
# ----- Run config -----:
BATCH_SIZE:
  - 1024
N_PARTITIONS: 8
PADDING_N_EDGE:
  - 64
PADDING_N_NODE:
  - 32
SEQ_EMBEDDING_SIZE: 1280
SEQ_MAX_LENGTH: 512
PYTABLE_FROM_DISK: false
# ----- Train config -----:
CLASS_ALPHA: null
LEARNING_RATE: 0.0625
LOSS_OPTION: l2_loss
AUXILIARY_LOSS_OPTION: aux_MLM
N_EPOCH:
  - 1500
OPTIMIZATION:
  OPTION: adam_transformer
  WARMUP_STEPS: 6000
  TRANSITION_EPOCHS: 700
SAVE_FREQUENCY: 20
# ----- Logging config -----:
LOGGING_PARENT_DIR: /mnt/logs/ProtLig_GPCRclassA/ProtLig_GPCRclassA
# ----- Data config -----:
TRAIN_CSV_NAME: data_train.csv
H5FILE: /mnt/ProtLig_GPCRclassA/ProtLig_GPCRclassA/amino_GNN/Data/m2or_conc_mixDiscard_20250501-165522/seqs/discard_by_length/PrecomputeESM2/esm2_t33_650M_UR50D.h5
H5FILE_TITLE: esm2_t33_650M_UR50D
SEQ_MODEL_NAME: esm2_t33_650M_UR50D
SEQ_MODEL_TOKENIZER_PATH: facebook/esm2_t33_650M_UR50D
MOLS_CSV: m2or_conc_mixDiscard_20250501-165522/mols/discard_by_list_20250501-165622/size_cut_SMILES_racemic/mols_nodeUPTO32_edgeUPTO64.csv
SEQS_CSV: m2or_conc_mixDiscard_20250501-165522/seqs/discard_by_length/seqs_lower296_upperInf.csv
DATACASE: m2or_conc_mixDiscard_20250501-165522/EC50_random_data/20250501-165825
DATA_PARENT_DIR: /mnt/ProtLig_GPCRclassA/ProtLig_GPCRclassA/amino_GNN/Data
LABEL_COL: responsive
AUXILIARY_LABEL_COLS: null
#   - Pyrfume_values
AUXILIARY_WEIGHT_COLS: null
#   - Pyrfume_weight
MOL_COL: SMILES_racemic
MOL_ID_COL: mol_id
MOL_GLOBAL_COLS: null
SEQ_COL: mutated_sequence
SEQ_ID_COL: seq_id
SEQ_GLOBAL_COLS: 
  - mutated_sequence
WEIGHT_COL: _calculate_class_weight
# ----- EC50 regression config -----:
CONC_PARAMETER_COL: parameter
CONC_VALUE_COL: value
CONC_VALUE_SCREEN_COL: value_screen
ELEMENT_TYPE: AminoElementPrecomputeMasked
DATASET_TYPE: AminoEC50RegressionDatasetPrecompute
NON_ACTIVE_EC50_VALUE : 3.0