CASE: amino_GNN
ACTION: eval_ec50_regression_masked_ckpts
# ----- Model config -----:
MODEL_NAME: ASMI
ATOM_FEATURES:
  - AtomicNum
  - ChiralTag
  - Hybridization
  - FormalCharge
  - NumImplicitHs
  - ExplicitValence
  - Mass
  - IsAromatic
BOND_FEATURES:
  - BondType
  - Stereo
  - IsAromatic
OUT_FEATURES: 2
RESTORE_MODEL_DIR: /mnt/logs/ProtLig_GPCRclassA/ProtLig_GPCRclassA/m2or_conc_mixDiscard_20250501-165522/EC50_random_data/20250501-165828/Model_v20250214_M/93433
# ----- HuggingFace -----:
HUGGINGFACE_CACHE_DIR: /mnt/ProtLig_GPCRclassA/ProtLig_GPCRclassA/.cache
# ----- Graph config -----:
LINE_GRAPH: false
LINE_GRAPH_MAX_SIZE_MULTIPLIER: 5
SELF_LOOPS: false
# ----- Loader config -----:
LOADER_OUTPUT_TYPE: tf
CACHE: true
CACHE_SEQ_LOOKUP: true
SHUFFLE_BUFFER_SIZE: 1536
# ----- Run config -----:
BATCH_SIZE: 128
N_PARTITIONS: 0
PADDING_N_EDGE: 64
PADDING_N_NODE: 32
SEQ_EMBEDDING_SIZE: 1280
SEQ_MAX_LENGTH: 512
PYTABLE_FROM_DISK: false
# ----- Train config -----:
CLASS_ALPHA: null
LEARNING_RATE: 0.0625
LOSS_OPTION: l2_loss
AUXILIARY_LOSS_OPTION: aux_MLM
N_EPOCH: 1500
OPTIMIZATION:
  OPTION: adam_transformer
  WARMUP_STEPS: 6000
  TRANSITION_EPOCHS: 700
SAVE_FREQUENCY: 20
# ----- Logging config -----:
LOGGING_PARENT_DIR: /mnt/logs/ProtLig_GPCRclassA/ProtLig_GPCRclassA
# ----- Data config -----:
VALID_CSV_NAME: data_train.csv
H5FILE: /mnt/ProtLig_GPCRclassA/ProtLig_GPCRclassA/amino_GNN/Data/m2or_conc_mixDiscard_20250501-165522/seqs/discard_by_length/PrecomputeESM2/esm2_t33_650M_UR50D.h5
H5FILE_TITLE: esm2_t33_650M_UR50D
SEQ_MODEL_NAME: esm2_t33_650M_UR50D
SEQ_MODEL_TOKENIZER_PATH: facebook/esm2_t33_650M_UR50D
MOLS_CSV: m2or_conc_mixDiscard_20250501-165522/mols/discard_by_list_20250501-165622/size_cut_SMILES_racemic/mols_nodeUPTO32_edgeUPTO64.csv
SEQS_CSV: m2or_conc_mixDiscard_20250501-165522/seqs/discard_by_length/seqs_lower296_upperInf.csv
DATACASE: m2or_conc_mixDiscard_20250501-165522/EC50_random_data/20250501-165828
DATA_PARENT_DIR: /mnt/ProtLig_GPCRclassA/ProtLig_GPCRclassA/amino_GNN/Data
LABEL_COL: responsive
AUXILIARY_LABEL_COLS: null
#   - Pyrfume_values
AUXILIARY_WEIGHT_COLS: null
#   - Pyrfume_weight
MOL_COL: SMILES_racemic
MOL_ID_COL: mol_id
MOL_GLOBAL_COLS: null
SEQ_COL: mutated_sequence
SEQ_ID_COL: seq_id
SEQ_GLOBAL_COLS: 
  - mutated_sequence
VALID_WEIGHT_COL: null
# ----- EC50 regression config -----:
CONC_PARAMETER_COL: parameter
CONC_VALUE_COL: value
CONC_VALUE_SCREEN_COL: value_screen
ELEMENT_TYPE: AminoElementPrecomputeMasked
DATASET_TYPE: AminoEC50RegressionDatasetPrecompute
NON_ACTIVE_EC50_VALUE : 3.0
# ----- Concentration range config -----:
MIN_CONC_SAMPLE: -5.01
MAX_CONC_SAMPLE: 1.01
EC50_DECISION_THRESHOLD: 2.0