# Dataset and Splits
data_root: DATA
dataset_class: pydgn.data.dataset.TUDatasetInterface
dataset_name:  NCI1
data_splits_file:  DATA_SPLITS/CHEMICAL/NCI1/NCI1_outer10_inner1.splits


# Hardware
device:  cuda
max_cpus:  64
max_gpus: 3
gpus_per_task:  0.25  # this multiplies GPU parallelism by 4, so be sure to provide enough CPUs


# Data Loading
dataset_getter: pydgn.data.provider.DataProvider
data_loader:
  class_name: torch_geometric.loader.DataLoader
  args:
    num_workers : 2
    pin_memory: True


# Reproducibility
seed: 42


# Experiment
result_folder: GSPN_RESULTS/UNSUPERVISED
exp_name: unsupervised_embedding_classification_chemical
experiment: unsupervised_embedding_classification.ClassificationTask
higher_results_are_better: True
evaluate_every: 1
final_training_runs: 3

grid:
  unsupervised_config:
    model: model.GSPN
    checkpoint: True
    shuffle: True
    batch_size: 32
    epochs: 500


    # Model specific arguments #

    embeddings_folder: 'UNSUP_GSPN_EMBEDDINGS_BASIC/'

    convolution_class: model.GSPNBaseConv
    emission_class: model.GSPNCategoricalEmission
    num_mixtures:
      - 10
      - 5
      - 20
    num_hidden_neurons:
      - 0  # not used at the moment
    num_layers:
      - 2
      - 5
      - 10
      - 15
      - 20

    # ------------------------ #

    # Optimizer
    optimizer:
      - class_name: pydgn.training.callback.optimizer.Optimizer
        args:
          optimizer_class_name: torch.optim.Adam
          lr: 0.1
          accumulate_gradients: False  # do/do not accumulate gradient across mini-batches

    # Scheduler
    scheduler: null

    # Loss metric
    loss:
      - class_name: metric.GSPNNodeLogLikelihood
        args:
          use_nodes_batch_size: True

    # Score metric
    scorer:
      - class_name: pydgn.training.callback.metric.MultiScore
        args:
          main_scorer:
            - class_name: metric.GSPNNodeLogLikelihood
              args:
                use_nodes_batch_size: True

    # Readout (optional)
    readout: null

    # Training engine
    engine: pydgn.training.engine.TrainingEngine

    # Gradient clipper (optional)
    gradient_clipper: null

    # Early stopper
    early_stopper:
      - class_name:
          - pydgn.training.callback.early_stopping.PatienceEarlyStopper
        args:
          patience:
            - 50
          monitor: validation_main_score
          mode: min
          checkpoint: True

    # Plotter of metrics
    plotter: pydgn.training.callback.plotter.Plotter

  supervised_config:
    model: readout.MLPGraphClassifier_GlobalReadout
    checkpoint: True
    shuffle: True
    batch_size: 32
    epochs: 1000

    # Model specific arguments #

    embeddings_folder: 'UNSUP_GSPN_EMBEDDINGS_BASIC/'

    global_pooling: sum

    hidden_units:
      - 8
      - 16
      - 32
      - 128

    # ------------------------ #

    # Optimizer
    optimizer:
      -
        class_name: pydgn.training.callback.optimizer.Optimizer
        args:
          optimizer_class_name: torch.optim.Adam
          lr: 0.001

    # Scheduler
    scheduler: null

    # Loss metric
    loss: pydgn.training.callback.metric.MulticlassClassification

    # Score metric
    scorer: pydgn.training.callback.metric.MulticlassAccuracy

    # Readout (optional)
    readout: null

    # Training engine
    engine: pydgn.training.engine.TrainingEngine

    # Gradient clipper (optional)
    gradient_clipper: null

    # Early stopper
    early_stopper:
      -
        class_name: pydgn.training.callback.early_stopping.PatienceEarlyStopper
        args:
          patience: 200
          monitor: validation_main_score
          mode: max
          checkpoint: True

    # Plotter of metrics
    plotter: pydgn.training.callback.plotter.Plotter