# config/config.yaml
# Model configuration
model:
  name: "yerevann/chemlactica-1.3b"
  tokenizer: "yerevann/chemlactica-1.3b"
  layer_idx: 12 #6

# CBM configuration
cbm:
  num_concepts: 2
  concept_dim: 2048 #768
  num_properties: 2

# Training configuration
training:
  save_every_n_batches: 5000
  checkpoint_dir: "./checkpoints/chemlactica1p3B/tdc/" 
  learning_rate: 1e-3
  num_epochs: 10
  lambda_ortho: 0.1

resume_from_checkpoint: True
resume_checkpoint_path: "PATH/concept_representation_alignment/checkpoints/chemlactica1p3B/tdc/epoch_6/checkpoint_batch_14999.ckpt"

# Processing parameters
batch_size: 64
save_dir: ./saved_models/chemlactica1p3B/tdc/

# Data configuration
data:
  path: "PATH/tdc_dataset.parquet"
  property_columns:
    - "QED"
    - "LOGP"
  train_ratio: 0.8
  val_ratio: 0.1
  max_length: 512

# Wandb configuration
use_wandb: true 
wandb:
  project: "cbm-rep-eng"
  name: cbm-teschemlactica1p3B-tdc  
  dir: "./wandb"  

# General settings
seed: 42
device: auto