model_name: CGCNN+P
model_key: cgcnn+p
model_version: v0.1.0 # the aviary version
date_added: '2023-02-03'
date_published: '2022-02-28'
authors:
  - name: Jason B. Gibson
    affiliation: University of Florida
    orcid: https://orcid.org/0000-0001-7974-5264
  - name: Ajinkya C. Hire
    affiliation: University of Florida
    orcid: https://orcid.org/0000-0003-3147-2521
  - name: Richard G. Hennig
    affiliation: University of Florida
    url: https://hennig.mse.ufl.edu
    email: rhennig@ufl.edu
    orcid: https://orcid.org/0000-0003-4933-7686
trained_by:
  - name: Janosh Riebesell
    affiliation: University of Cambridge, Lawrence Berkeley National Laboratory
    email: janosh.riebesell@gmail.com
    orcid: https://orcid.org/0000-0001-5233-3462

repo: https://github.com/JasonGibsonUfl/Augmented_CGCNN
doi: https://doi.org/10.1038/s41524-022-00891-8
paper: https://arxiv.org/abs/2202.13947
pr_url: https://github.com/janosh/matbench-discovery/pull/85
checkpoint_url: https://api.wandb.ai/files/janosh/matbench-discovery/tx6cepg6/checkpoint.pth

license:
  code: MIT
  code_url: https://github.com/CompRhys/aviary/blob/3238fb415/LICENSE
  checkpoint: MIT
  checkpoint_url: https://github.com/janosh/matbench-discovery/blob/7c0b089e7/license

requirements:
  aviary: https://github.com/CompRhys/aviary/releases/tag/v0.1.0
  torch: 1.11.0
  torch-scatter: 2.0.9
  numpy: 1.24.0
  pandas: 1.5.1

openness: OSOD
trained_for_benchmark: true
train_task: S2RE
test_task: IS2RE
targets: E
model_type: GNN
model_params: 128_450
n_estimators: 10

training_set: [MP 2022]
training_cost: missing

hyperparams:
  Perturbations: 5
  graph_construction_radius: 5.0 # Å, from https://github.com/CompRhys/aviary/blob/451f5739/aviary/cgcnn/data.py#L28
  max_neighbors: .inf # CGCNN paper benchmarks 6.0 Å cutoff radius vs. 12 NNs graph construction

notes:
  Description: |
    This work proposes simple structure perturbations to augment CGCNN's training data of relaxed structures with randomly perturbed ones resembling unrelaxed structures that are mapped to the same DFT final energy during training.
    ![Step function PES](https://media.springernature.com/full/springer-static/image/art%3A10.1038%2Fs41524-022-00891-8/MediaObjects/41524_2022_891_Fig1_HTML.png?as=webp)
  Long: The model is essentially taught the potential energy surface (PES) is a step-function that maps each valley to its local minimum. The expectation is that during testing on unrelaxed structures, the model will predict the energy of the nearest basin in the PES. The authors confirm this by demonstrating a lowering of the energy error on unrelaxed structures.

metrics:
  phonons: not applicable # model doesn't predict forces
  geo_opt: not applicable
  discovery:
    pred_file: models/cgcnn/2023-02-05-cgcnn-perturb=5-wbm-IS2RE.csv.gz
    pred_file_url: https://figshare.com/files/51607274
    pred_col: e_form_per_atom_cgcnn_pred_ens
    full_test_set:
      F1: 0.51 # fraction
      DAF: 2.398 # dimensionless
      Precision: 0.411 # fraction
      Recall: 0.67 # fraction
      Accuracy: 0.779 # fraction
      TPR: 0.67 # fraction
      FPR: 0.199 # fraction
      TNR: 0.801 # fraction
      FNR: 0.33 # fraction
      TP: 29557.0 # count
      FP: 42281.0 # count
      TN: 170590.0 # count
      FN: 14535.0 # count
      MAE: 0.108 # eV/atom
      RMSE: 0.178 # eV/atom
      R2: 0.027 # dimensionless
      missing_preds: 4 # count
    most_stable_10k:
      F1: 0.736 # fraction
      DAF: 3.813 # dimensionless
      Precision: 0.583 # fraction
      Recall: 1.0 # fraction
      Accuracy: 0.583 # fraction
      TPR: 1.0 # fraction
      FPR: 1.0 # fraction
      TNR: 0.0 # fraction
      FNR: 0.0 # fraction
      TP: 5829.0 # count
      FP: 4171.0 # count
      TN: 0.0 # count
      FN: 0.0 # count
      MAE: 0.2 # eV/atom
      RMSE: 0.275 # eV/atom
      R2: -0.076 # dimensionless
      missing_preds: 0 # count
    unique_prototypes:
      F1: 0.5 # fraction
      DAF: 2.563 # dimensionless
      Precision: 0.392 # fraction
      Recall: 0.693 # fraction
      Accuracy: 0.786 # fraction
      TPR: 0.693 # fraction
      FPR: 0.197 # fraction
      TNR: 0.803 # fraction
      FNR: 0.307 # fraction
      TP: 23117.0 # count
      FP: 35893.0 # count
      TN: 146221.0 # count
      FN: 10257.0 # count
      MAE: 0.113 # eV/atom
      RMSE: 0.182 # eV/atom
      R2: 0.019 # dimensionless
      missing_preds: 2 # count
