CASE: amino_GNN
ACTION: train_conc_masked_pmap
# ----- Model config -----:
MODEL_NAME: ASMI_DR
ATOM_FEATURES:
  - AtomicNum
  - ChiralTag
  - Hybridization
  - FormalCharge
  - NumImplicitHs
  - ExplicitValence
  - Mass
  - IsAromatic
BOND_FEATURES:
  - BondType
  - Stereo
  - IsAromatic
OUT_FEATURES: 2
RESTORE_FILE: null
# ----- HuggingFace -----:
HUGGINGFACE_CACHE_DIR: /mnt/ProtLig_GPCRclassA/ProtLig_GPCRclassA/.cache
# ----- Graph config -----:
LINE_GRAPH: false
LINE_GRAPH_MAX_SIZE_MULTIPLIER: 5
SELF_LOOPS: false
# ----- Loader config -----:
LOADER_OUTPUT_TYPE: tf
CACHE: true
CACHE_SEQ_LOOKUP: true
SHUFFLE_BUFFER_SIZE: 16384
# ----- Run config -----:
BATCH_SIZE:
  - 1024
N_PARTITIONS: 8
PADDING_N_EDGE:
  - 64
PADDING_N_NODE:
  - 32
SEQ_EMBEDDING_SIZE: 1280
SEQ_MAX_LENGTH: 512
PYTABLE_FROM_DISK: false
# ----- Train config -----:
CLASS_ALPHA: null
LEARNING_RATE: 0.0625
LOSS_OPTION: cross_entropy
AUXILIARY_LOSS_OPTION: aux_MLM
N_EPOCH:
  - 1500
OPTIMIZATION:
  OPTION: adam_transformer
  WARMUP_STEPS: 6000
  TRANSITION_EPOCHS: 700
SAVE_FREQUENCY: 20
LOG_IMAGES_FREQUENCY: 20
# ----- Logging config -----:
LOGGING_PARENT_DIR: /mnt/logs/ProtLig_GPCRclassA/ProtLig_GPCRclassA
# ----- Data config -----:
TRAIN_CSV_NAME: data_train.csv
H5FILE: /mnt/ProtLig_GPCRclassA/ProtLig_GPCRclassA/amino_GNN/Data/m2or_conc_mixDiscard_20250501-165522/seqs/discard_by_length/PrecomputeESM2/esm2_t33_650M_UR50D.h5
H5FILE_TITLE: esm2_t33_650M_UR50D
SEQ_MODEL_NAME: esm2_t33_650M_UR50D
SEQ_MODEL_TOKENIZER_PATH: facebook/esm2_t33_650M_UR50D
MOLS_CSV: m2or_conc_mixDiscard_20250501-165522/mols/discard_by_list_20250501-165622/size_cut_SMILES_racemic/mols_nodeUPTO32_edgeUPTO64.csv
SEQS_CSV: m2or_conc_mixDiscard_20250501-165522/seqs/discard_by_length/seqs_lower296_upperInf.csv
DATACASE: m2or_conc_mixDiscard_20250501-165522/EC50_random_data/20250501-165818
DATA_PARENT_DIR: /mnt/ProtLig_GPCRclassA/ProtLig_GPCRclassA/amino_GNN/Data
LABEL_COL: responsive
AUXILIARY_LABEL_COLS: null
#   - Pyrfume_values
AUXILIARY_WEIGHT_COLS: null
#   - Pyrfume_weight
MOL_COL: SMILES_racemic
MOL_ID_COL: mol_id
MOL_GLOBAL_COLS: null
SEQ_COL: mutated_sequence
SEQ_ID_COL: seq_id
SEQ_GLOBAL_COLS: 
  - mutated_sequence
WEIGHT_COL: null
# ----- EC50 sampling config -----:
CONC_PARAMETER_COL: parameter
ELEMENT_TYPE: AminoConcentrationElementPrecomputeMasked
DATASET_TYPE: AminoConcentrationDatasetMeasurementsSamplingPrecompute
INCLUDE_CONC_PARAMETER_LIST: # ec50_nd, ec50_greater_than, ec50, screening
#   - ec50_nd
#   - ec50_greater_than
#   - ec50
  - screening
CONCENTRATION_SAMPLER_TYPE: LabelSampler
N_EC50_COPIES: 10
CONC_VALUE_COL: value
CONC_VALUE_SCREEN_COL: value_screen
EC50_STD_MULTIPLIER: null
EC50_LOWER_MARGIN: 0.25
EC50_UPPER_MARGIN: 0.25
EC50_LOWER_EXTREME: null
EC50_UPPER_EXTREME: null
EC50_GREATER_THAN_LOWER_MARGIN: 0.25
EC50_GREATER_THAN_LOWER_EXTREME: null
SCREENING_LOWER_MARGIN: 0.25
SCREENING_UPPER_MARGIN: 0.0
SCREENING_LOWER_EXTREME: null
SCREENING_UPPER_EXTREME: null
EC50_ND_LOWER_EXTREME: null
EC50_ND_UPPER_EXTREME: null
# ----- Monotonocity config -----:
MONOTONICITY_SLOPE_POS: null
MONOTONICITY_SLOPE_NEG: null
MONOTONICITY_EPS_POS: null
MONOTONICITY_EPS_NEG: null
# ----- uniform concentration sampler config -----:
SAMPLING_REGION_LOWER_BOUND : -5.01
SAMPLING_REGION_UPPER_BOUND : 1.01
UNKNOWN_CASE_SAMPLE_WEIGHT_SCALE : 1.0 # 10/512? # NOTE: The highest "unknown case" weight equals (1/2)*batch_size (i.e. when there's a single example per batch)