{
  "@context": {
    "@language": "en",
    "@vocab": "https://schema.org/",
    "arrayShape": "cr:arrayShape",
    "citeAs": "cr:citeAs",
    "rai": "http://mlcommons.org/croissant/RAI/",
    "column": "cr:column",
    "conformsTo": "dct:conformsTo",
    "cr": "http://mlcommons.org/croissant/",
    "data": {
      "@id": "cr:data",
      "@type": "@json"
    },
    "dataBiases": "cr:dataBiases",
    "dataCollection": "cr:dataCollection",
    "dataType": {
      "@id": "cr:dataType",
      "@type": "@vocab"
    },
    "dct": "http://purl.org/dc/terms/",
    "extract": "cr:extract",
    "field": "cr:field",
    "fileProperty": "cr:fileProperty",
    "fileObject": "cr:fileObject",
    "fileSet": "cr:fileSet",
    "format": "cr:format",
    "includes": "cr:includes",
    "isArray": "cr:isArray",
    "isLiveDataset": "cr:isLiveDataset",
    "jsonPath": "cr:jsonPath",
    "key": "cr:key",
    "md5": "cr:md5",
    "parentField": "cr:parentField",
    "path": "cr:path",
    "personalSensitiveInformation": "cr:personalSensitiveInformation",
    "recordSet": "cr:recordSet",
    "references": "cr:references",
    "regex": "cr:regex",
    "repeated": "cr:repeated",
    "replace": "cr:replace",
    "sc": "https://schema.org/",
    "separator": "cr:separator",
    "source": "cr:source",
    "subField": "cr:subField",
    "transform": "cr:transform"
  },
  "@type": "sc:Dataset",
  "distribution": [
    {
      "@type": "cr:FileObject",
      "@id": "repo",
      "name": "repo",
      "description": "The Hugging Face git repository.",
      "contentUrl": "https://huggingface.co/datasets/ai-chem/Nanozymes/tree/refs%2Fconvert%2Fparquet",
      "encodingFormat": "git+https",
      "sha256": "https://github.com/mlcommons/croissant/issues/80"
    },
    {
      "@type": "cr:FileSet",
      "@id": "parquet-files-for-config-default",
      "containedIn": {
        "@id": "repo"
      },
      "encodingFormat": "application/x-parquet",
      "includes": "default/*/*.parquet"
    }
  ],
  "recordSet": [
    {
      "@type": "cr:RecordSet",
      "dataType": "cr:Split",
      "key": {
        "@id": "default_splits/split_name"
      },
      "@id": "default_splits",
      "name": "default_splits",
      "description": "Splits for the default config.",
      "field": [
        {
          "@type": "cr:Field",
          "@id": "default_splits/split_name",
          "dataType": "sc:Text"
        }
      ],
      "data": [
        {
          "default_splits/split_name": "train"
        }
      ]
    },
    {
      "@type": "cr:RecordSet",
      "@id": "default",
      "description": "ai-chem/Nanozymes - 'default' subset",
      "field": [
        {
          "@type": "cr:Field",
          "@id": "default/split",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "fileProperty": "fullpath"
            },
            "transform": {
              "regex": "default/(?:partial-)?(train)/.+parquet$"
            }
          },
          "references": {
            "field": {
              "@id": "default_splits/split_name"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/formula",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "formula"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/activity",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "activity"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/syngony",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "syngony"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/length",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "length"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/width",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "width"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/depth",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "depth"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/surface",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "surface"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/km_value",
          "dataType": "cr:Float64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "km_value"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/km_unit",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "km_unit"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/vmax_value",
          "dataType": "cr:Float64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "vmax_value"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/vmax_unit",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "vmax_unit"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/target_source",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "target_source"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/reaction_type",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "reaction_type"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/c_min",
          "dataType": "cr:Float64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "c_min"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/c_max",
          "dataType": "cr:Float64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "c_max"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/c_const",
          "dataType": "cr:Float64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "c_const"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/c_const_unit",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "c_const_unit"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/ccat_value",
          "dataType": "cr:Float64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "ccat_value"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/ccat_unit",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "ccat_unit"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/ph",
          "dataType": "cr:Float64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "ph"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/temperature",
          "dataType": "cr:Float64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "temperature"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/doi",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "doi"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/pdf",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "pdf"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/access",
          "dataType": "cr:Int64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "access"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/title",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "title"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/journal",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "journal"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/year",
          "dataType": "cr:Int64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "year"
            }
          }
        }
      ]
    }
  ],
  "conformsTo": "http://mlcommons.org/croissant/1.1",
  "name": "Nanozymes",
  "identifier": "10.57967/hf/5576",
  "description": "ai-chem/Nanozymes dataset hosted on Hugging Face and contributed by the HF Datasets community",
  "alternateName": [
    "ai-chem/Nanozymes"
  ],
  "creator": {
    "@type": "Organization",
    "name": "Center for AI in Chemistry @ ITMO University",
    "url": "https://huggingface.co/ai-chem"
  },
  "keywords": [
    "mit",
    "1K - 10K",
    "parquet",
    "Tabular",
    "Text",
    "Datasets",
    "pandas",
    "Croissant",
    "Polars",
    "doi:10.57967/hf/5576",
    "🇺🇸 Region: US"
  ],
  "citeAs": "@misc{center_for_ai_in_chemistry_@_itmo_university_2025, author = { Center for AI in Chemistry @ ITMO University }, title = { Nanozymes (Revision 4f3971b) }, year = 2025, url = { https://huggingface.co/datasets/ai-chem/Nanozymes }, doi = { 10.57967/hf/5576 }, publisher = { Hugging Face } }",
  "version": 1.0,
  "isAccessibleForFree": true,
  "datePublished": "2025-05-15",
  "rai:dataCollection": "The data was collected from full-text articles in PDF format and supplementary materials. Extraction was performed manually by annotators with expertise in chemistry.",
  "rai:personalSensitiveInformation": "There is no personal or sensitive information in the dataset. Only depersonalized experimental data from scientific publications is used, including data on cell lines.",
  "rai:annotatorDemographics": "The datasets were annotated by Russian women, aged 22 to 25, who are master's and PhD students specializing in chemistry and data science.",
  "rai:dataBiases": "Possible biases include the dominance of repetitive articles with similar experiments over less represented cases, uneven coverage of different types of catalytic activity, undocumented reaction conditions such as surface coatings in some records, and missing values that may distort the overall representativeness.",
  "rai:dataLimitations": "The data is limited to in vitro laboratory experiments and does not cover biological or clinical application scenarios. Some values, such as syngony, were calculated or interpreted by the annotation team rather than extracted directly from the article text. Additionally, there may be missing information regarding coatings, parameters, and concentrations.",
  "rai:dataSocialImpact": "The dataset aims to enhance the extraction algorithms for nanoparticles and their catalytic properties, enabling more effective applications in medicine and chemical sciences.",
  "license": "https://choosealicense.com/licenses/mit/",
  "url": "https://huggingface.co/datasets/ai-chem/Nanozymes"
}