model_name: DPA3-v1-OpenLAM
model_key: dpa3-v1-openlam
model_version: v0.1 # 2025-01-10
date_added: '2025-01-10'
date_published: '2025-01-10'
authors:
  - name: Duo Zhang
    affiliation: AI for Science Institute, Beijing
    orcid: https://orcid.org/0000-0001-9591-2659
  - name: Anyang Peng
    affiliation: AI for Science Institute, Beijing
    orcid: https://orcid.org/0000-0002-0630-2187
  - name: Chun Cai
    affiliation: AI for Science Institute, Beijing
    orcid: https://orcid.org/0000-0001-6242-0439
  - name: Linfeng Zhang
    affiliation: AI for Science Institute, Beijing; DP Technology
    email: linfeng.zhang.zlf@gmail.com
    corresponding: true
  - name: Han Wang
    affiliation: Beijing Institute of Applied Physics and Computational Mathematics (IAPCM)
    email: wang_han@iapcm.ac.cn
    corresponding: true
trained_by:
  - name: Anyang Peng
    affiliation: AI for Science Institute, Beijing
    orcid: https://orcid.org/0000-0002-0630-2187

repo: https://github.com/deepmodeling/deepmd-kit/tree/dpa3-alpha
url: https://github.com/deepmodeling/deepmd-kit/tree/dpa3-alpha
doi: https://github.com/deepmodeling/deepmd-kit/tree/dpa3-alpha # to be released soon
paper: https://github.com/deepmodeling/deepmd-kit/tree/dpa3-alpha # to be released soon
pr_url: https://github.com/janosh/matbench-discovery/pull/192
# checkpoints reported in https://github.com/deepmodeling/deepmd-kit/discussions/4682
checkpoint_url: https://bohrium-api.dp.tech/ds-dl/dpa3openlam-74ng-v3.zip

license:
  code: LGPL-3.0
  code_url: https://github.com/deepmodeling/deepmd-kit/blob/70bc6d89/LICENSE
  checkpoint: LGPL-3.0
  checkpoint_url: https://github.com/deepmodeling/deepmd-kit/blob/70bc6d89/LICENSE

openness: OSCD
train_task: S2EFS
test_task: IS2RE-SR
targets: EFS_G
model_type: UIP
model_params: 8_184_608
n_estimators: 1
status: superseded
trained_for_benchmark: true

hyperparams:
  max_force: 0.05
  max_steps: 500
  ase_optimizer: FIRE
  cell_filter: ExpCellFilter
  n_layers: 6
  e_rcut: 6.0
  a_rcut: 4.0
  n_dim: 384
  e_dim: 96
  a_dim: 32
  optimizer: Adam
  pretrain:
    loss: MSE
    loss_weights:
      energy: 0.02 -> 1
      force: 1000 -> 100
      virial: 0.02 -> 1
    initial_learning_rate: 0.001
    learning_rate_schedule: ExpLR - start_lr=0.001, decay_steps=5000, stop_lr=0.00001
    training_steps: 1600000
    batch_size: 960 # 120 (gpus) * 8 (batch per gpu) = 960 (total batch size)
    epochs: 9.4
  finetune:
    loss: Huber
    loss_weights:
      energy: 15
      force: 1
      virial: 2.5
    initial_learning_rate: 0.0001
    learning_rate_schedule: ExpLR - start_lr=0.0001, decay_steps=5000, stop_lr=0.000006
    training_steps: 400000
    batch_size: 256 # 64 (gpus) * 4 (batch per gpu) = 256 (total batch size)
    epochs: 9
    loss_continue: Huber
    loss_weights_continue:
      energy: 30
      force: 1
      virial: 2.5
    initial_learning_rate_continue: 0.0001
    learning_rate_schedule_continue: ExpLR - start_lr=0.0001, decay_steps=5000, stop_lr=0.000000001
    training_steps_continue: 600000
    batch_size_continue: 256 # 64 (gpus) * 4 (batch per gpu) = 256 (total batch size)
    epochs_continue: 13.5
  graph_construction_radius: 6.0 # Å
  max_neighbors: 120 # from https://github.com/deepmodeling/deepmd-kit/discussions/4682#discussioncomment-12836651

requirements:
  torch: 2.3.1
  torch-geometric: 2.5.2
  ase: 3.23.0
  pymatgen: 2024.6.10
  numpy: 1.26.4

training_set: [OpenLAM]
training_cost: missing

notes:
  Description: |
    DPA3 is an advanced interatomic potential leveraging the message passing architecture, implemented within the DeePMD-kit framework, available on [GitHub](https://github.com/deepmodeling/deepmd-kit/tree/dpa3-alpha).
    Designed as a large atomic model (LAM), DPA3 is tailored to integrate and simultaneously train on datasets from various disciplines, encompassing diverse chemical and materials systems across different research domains.
    Its model design ensures exceptional fitting accuracy and robust generalization both within and beyond the training domain.
    Furthermore, DPA3 maintains energy conservation and respects the physical symmetries of the potential energy surface, making it a dependable tool for a wide range of scientific applications.

metrics:
  phonons:
    kappa_103:
      κ_SRME: 0.741
      pred_file: models/deepmd/dpa3-v1-openlam/2025-01-10-kappa-103-FIRE-dist=0.01-fmax=1e-4-symprec=1e-5.json.gz
      pred_file_url: https://figshare.com/files/52134863
  geo_opt:
    pred_file: models/deepmd/dpa3-v1-openlam/2025-01-10-wbm-geo-opt.jsonl.gz
    struct_col: dp_structure
    pred_file_url: https://figshare.com/files/57754516
    symprec=1e-5:
      analysis_file: models/deepmd/dpa3-v1-openlam/2025-01-10-wbm-geo-opt-symprec=1e-5-moyo=0.3.3.csv.gz
      analysis_file_url: https://figshare.com/files/52291973
      rmsd: 0.0128 # unitless
      n_sym_ops_mae: 2.1477 # unitless
      symmetry_decrease: 0.0657 # fraction
      symmetry_match: 0.7188 # fraction
      symmetry_increase: 0.2094 # fraction
      n_structures: 256963 # count
    symprec=1e-2:
      analysis_file: models/deepmd/dpa3-v1-openlam/2025-01-10-wbm-geo-opt-symprec=1e-2-moyo=0.3.3.csv.gz
      analysis_file_url: https://figshare.com/files/52291976
      rmsd: 0.0128 # unitless
      n_sym_ops_mae: 1.8912 # unitless
      symmetry_decrease: 0.0515 # fraction
      symmetry_match: 0.8097 # fraction
      symmetry_increase: 0.1314 # fraction
      n_structures: 256963 # count
  discovery:
    pred_file: models/deepmd/dpa3-v1-openlam/2025-01-10-wbm-IS2RE.csv.gz
    pred_file_url: https://figshare.com/files/52057532
    pred_col: e_form_per_atom_dp
    full_test_set:
      F1: 0.862 # fraction
      DAF: 5.013 # dimensionless
      Precision: 0.86 # fraction
      Recall: 0.865 # fraction
      Accuracy: 0.953 # fraction
      TPR: 0.865 # fraction
      FPR: 0.029 # fraction
      TNR: 0.971 # fraction
      FNR: 0.135 # fraction
      TP: 38130.0 # count
      FP: 6197.0 # count
      TN: 206674.0 # count
      FN: 5962.0 # count
      MAE: 0.023 # eV/atom
      RMSE: 0.067 # eV/atom
      R2: 0.863 # dimensionless
      missing_preds: 2 # count
    most_stable_10k:
      F1: 0.987 # fraction
      DAF: 6.371 # dimensionless
      Precision: 0.974 # fraction
      Recall: 1.0 # fraction
      Accuracy: 0.974 # fraction
      TPR: 1.0 # fraction
      FPR: 1.0 # fraction
      TNR: 0.0 # fraction
      FNR: 0.0 # fraction
      TP: 9740.0 # count
      FP: 260.0 # count
      TN: 0.0 # count
      FN: 0.0 # count
      MAE: 0.019 # eV/atom
      RMSE: 0.066 # eV/atom
      R2: 0.905 # dimensionless
      missing_preds: 0 # count
    unique_prototypes:
      F1: 0.883 # fraction
      DAF: 5.754 # dimensionless
      Precision: 0.88 # fraction
      Recall: 0.885 # fraction
      Accuracy: 0.963 # fraction
      TPR: 0.885 # fraction
      FPR: 0.022 # fraction
      TNR: 0.978 # fraction
      FNR: 0.115 # fraction
      TP: 29549.0 # count
      FP: 4042.0 # count
      TN: 178072.0 # count
      FN: 3825.0 # count
      MAE: 0.023 # eV/atom
      RMSE: 0.067 # eV/atom
      R2: 0.869 # dimensionless
      missing_preds: 0 # count
