model_name: MatterSim v1 5M
model_key: mattersim-v1-5M
model_version: v1.0.0
date_added: '2024-12-16'
date_published: '2024-05-08'
authors:
  - name: Han Yang
    affiliation: Microsoft Research AI for Science
    email: hanyang@microsoft.com
    orcid: https://orcid.org/0000-0002-4531-093X
  - name: Chenxi Hu
    affiliation: Microsoft Research AI for Science
    orcid: https://orcid.org/0009-0006-8486-9230
  - name: Yichi Zhou
    affiliation: Microsoft Research AI for Science
  - name: Xixian Liu
    affiliation: Microsoft Research AI for Science
    orcid: https://orcid.org/0009-0008-9215-3990
  - name: Yu Shi
    affiliation: Microsoft Research AI for Science
    orcid: https://orcid.org/0000-0001-9235-8963
  - name: Jielan Li
    affiliation: Microsoft Research AI for Science
    email: jielanli@microsoft.com
  - name: Guanzhi Li
    affiliation: Microsoft Research AI for Science
    orcid: https://orcid.org/0000-0002-4167-6432
  - name: Zekun Chen
    affiliation: Microsoft Research AI for Science
    orcid: https://orcid.org/0000-0002-4183-2941
  - name: Shuizhou Chen
    affiliation: Microsoft Research AI for Science
    orcid: https://orcid.org/0009-0005-2701-5565
  - name: Claudio Zeni
    affiliation: Microsoft Research AI for Science
    orcid: https://orcid.org/0000-0002-6334-2679
  - name: Matthew Horton
    affiliation: Microsoft Research AI for Science
    orcid: https://orcid.org/0000-0001-7777-8871
  - name: Robert Pinsler
    affiliation: Microsoft Research AI for Science
    orcid: https://orcid.org/0000-0003-1454-188X
  - name: Andrew Fowler
    affiliation: Microsoft Research AI for Science
    orcid: https://orcid.org/0000-0002-7360-3078
  - name: Daniel Zügner
    affiliation: Microsoft Research AI for Science
    orcid: https://orcid.org/0000-0003-1626-5065
  - name: Tian Xie
    affiliation: Microsoft Research AI for Science
    orcid: https://orcid.org/0000-0002-0987-4666
  - name: Jake Smith
    affiliation: Microsoft Research AI for Science
    orcid: https://orcid.org/0000-0003-0412-1312
  - name: Lixin Sun
    affiliation: Microsoft Research AI for Science
    orcid: https://orcid.org/0000-0002-7971-5222
  - name: Qian Wang
    affiliation: Microsoft Research AI for Science
    orcid: https://orcid.org/0009-0007-7680-4514
  - name: Lingyu Kong
    affiliation: Microsoft Research AI for Science
    orcid: https://orcid.org/0009-0006-2226-5730
  - name: Chang Liu
    affiliation: Microsoft Research AI for Science
    orcid: https://orcid.org/0000-0001-5207-5440
  - name: Hongxia Hao
    affiliation: Microsoft Research AI for Science
    email: hongxiahao@microsoft.com
    orcid: https://orcid.org/0000-0002-4382-200X
  - name: Ziheng Lu
    affiliation: Microsoft Research AI for Science
    email: zihenglu@microsoft.com
    orcid: https://orcid.org/0000-0003-2239-8526

repo: https://github.com/microsoft/mattersim
doi: https://doi.org/10.48550/arXiv.2405.04967
paper: https://arxiv.org/abs/2405.04967
pr_url: https://github.com/janosh/matbench-discovery/pull/178
checkpoint_url: https://github.com/microsoft/mattersim/raw/refs/heads/main/pretrained_models/mattersim-v1.0.0-5M.pth

license:
  code: MIT
  code_url: https://github.com/microsoft/mattersim/blob/d0a52e64fc/LICENSE.txt
  checkpoint: MIT
  checkpoint_url: https://github.com/microsoft/mattersim/blob/d0a52e64fc/MODEL_CARD.md

requirements:
  Graphormer: v1
  numpy: 1.26.2
  pymatgen: 2024.5.1
  python: '>=3.9'

openness: OSCD
trained_for_benchmark: false
train_task: S2EFS
test_task: IS2RE-SR
targets: EFS_G
model_type: UIP
model_params: 4_549_142
n_estimators: 1

training_set: [MatterSim]
training_cost: missing

hyperparams:
  optimizer: AdamW
  ase_optimizer: FIRE
  cell_filter: ExpCellFilter
  learning_rate: 5e-4 # initial value, linearly decayed to 0
  seed: 42
  batch_size: 128
  units: 256
  graph_construction_radius: 5.0 # Å
  max_neighbors: .inf

notes:
  Description: This is an open source version of MatterSim V1 based on M3GNet architecture.
  Training: MatterSim was trained on a large, closed dataset covering diverse combinations of 89 elements across many temperatures and pressures.
  Tested Applications: |
    - Energy, force, stress prediction
    - Molecular dynamics simulations
    - Phonons
    - Mechanical properties
    - Free energy and phase diagrams
    - Materials discovery
  Training Data Sources: |
    - Materials Project
    - Alexandria dataset
    - newly generated structures and MD trajectories

metrics:
  phonons:
    kappa_103: # https://github.com/MPA2suite/k_SRME/pull/13
      κ_SRME: 0.574 # dimensionless
      pred_file: models/mattersim/mattersim-v1-5M/2024-12-09-kappa-103-FIRE-dist=0.01-fmax=1e-4-symprec=1e-5.json.gz
      pred_file_url: https://figshare.com/files/52134884
  geo_opt:
    pred_file: models/mattersim/mattersim-v1-5M/2024-12-19-wbm-geo-opt.jsonl.gz
    pred_file_url: https://figshare.com/files/57753373
    struct_col: mattersim_5M_structure
    symprec=1e-2:
      rmsd: 0.0733 # unitless
      n_sym_ops_mae: 1.7451 # unitless
      symmetry_decrease: 0.0524 # fraction
      symmetry_match: 0.8148 # fraction
      symmetry_increase: 0.1254 # fraction
      n_structures: 256963 # count
      analysis_file: models/mattersim/mattersim-v1-5M/2024-12-19-wbm-geo-opt-symprec=1e-2-moyo=0.4.2.csv.gz
      analysis_file_url: https://figshare.com/files/53504675
    symprec=1e-5:
      rmsd: 0.0733 # unitless
      n_sym_ops_mae: 2.1408 # unitless
      symmetry_decrease: 0.0755 # fraction
      symmetry_match: 0.6874 # fraction
      symmetry_increase: 0.2305 # fraction
      n_structures: 256963 # count
      analysis_file: models/mattersim/mattersim-v1-5M/2024-12-19-wbm-geo-opt-symprec=1e-5-moyo=0.4.2.csv.gz
      analysis_file_url: https://figshare.com/files/53504678
  discovery:
    pred_file: models/mattersim/mattersim-v1-5M/2024-12-16-wbm-IS2RE.csv.gz
    # the original Graphormer-based replaced the M3GNet-based MatterSim on the leaderboard
    # pred_file: models/mattersim/2024-06-16-mattersim-graphormer-wbm-IS2RE.csv.gz
    pred_file_url: https://figshare.com/files/52057559
    pred_col: e_form_per_atom_mattersim
    full_test_set:
      F1: 0.838 # fraction
      DAF: 5.123 # dimensionless
      Precision: 0.879 # fraction
      Recall: 0.801 # fraction
      Accuracy: 0.947 # fraction
      TPR: 0.801 # fraction
      FPR: 0.023 # fraction
      TNR: 0.977 # fraction
      FNR: 0.199 # fraction
      TP: 35309.0 # count
      FP: 4858.0 # count
      TN: 208013.0 # count
      FN: 8783.0 # count
      MAE: 0.024 # eV/atom
      RMSE: 0.069 # eV/atom
      R2: 0.854 # dimensionless
      missing_preds: 2 # count
    most_stable_10k:
      F1: 0.984 # fraction
      DAF: 6.339 # dimensionless
      Precision: 0.969 # fraction
      Recall: 1.0 # fraction
      Accuracy: 0.969 # fraction
      TPR: 1.0 # fraction
      FPR: 1.0 # fraction
      TNR: 0.0 # fraction
      FNR: 0.0 # fraction
      TP: 9691.0 # count
      FP: 309.0 # count
      TN: 0.0 # count
      FN: 0.0 # count
      MAE: 0.022 # eV/atom
      RMSE: 0.078 # eV/atom
      R2: 0.869 # dimensionless
      missing_preds: 0 # count
    unique_prototypes:
      F1: 0.862 # fraction
      DAF: 5.852 # dimensionless
      Precision: 0.895 # fraction
      Recall: 0.831 # fraction
      Accuracy: 0.959 # fraction
      TPR: 0.831 # fraction
      FPR: 0.018 # fraction
      TNR: 0.982 # fraction
      FNR: 0.169 # fraction
      TP: 27750.0 # count
      FP: 3268.0 # count
      TN: 178846.0 # count
      FN: 5624.0 # count
      MAE: 0.024 # eV/atom
      RMSE: 0.068 # eV/atom
      R2: 0.863 # dimensionless
      missing_preds: 0 # count
  diatomics:
    pred_file: models/mattersim/mattersim-v1-5M/2025-02-19-diatomics.json.gz
    pred_file_url: https://figshare.com/files/52468178
