{
  "@context": {
    "@language": "en",
    "@vocab": "https://schema.org/",
    "arrayShape": "cr:arrayShape",
    "rai": "http://mlcommons.org/croissant/RAI/",
    "citeAs": "cr:citeAs",
    "column": "cr:column",
    "conformsTo": "dct:conformsTo",
    "cr": "http://mlcommons.org/croissant/",
    "data": {
      "@id": "cr:data",
      "@type": "@json"
    },
    "dataBiases": "cr:dataBiases",
    "dataCollection": "cr:dataCollection",
    "dataType": {
      "@id": "cr:dataType",
      "@type": "@vocab"
    },
    "dct": "http://purl.org/dc/terms/",
    "extract": "cr:extract",
    "field": "cr:field",
    "fileProperty": "cr:fileProperty",
    "fileObject": "cr:fileObject",
    "fileSet": "cr:fileSet",
    "format": "cr:format",
    "includes": "cr:includes",
    "isArray": "cr:isArray",
    "isLiveDataset": "cr:isLiveDataset",
    "jsonPath": "cr:jsonPath",
    "key": "cr:key",
    "md5": "cr:md5",
    "parentField": "cr:parentField",
    "path": "cr:path",
    "personalSensitiveInformation": "cr:personalSensitiveInformation",
    "recordSet": "cr:recordSet",
    "references": "cr:references",
    "regex": "cr:regex",
    "repeated": "cr:repeated",
    "replace": "cr:replace",
    "sc": "https://schema.org/",
    "separator": "cr:separator",
    "source": "cr:source",
    "subField": "cr:subField",
    "transform": "cr:transform"
  },
  "@type": "sc:Dataset",
  "distribution": [
    {
      "@type": "cr:FileObject",
      "@id": "repo",
      "name": "repo",
      "description": "The Hugging Face git repository.",
      "contentUrl": "https://huggingface.co/datasets/ai-chem/Synergy/tree/refs%2Fconvert%2Fparquet",
      "encodingFormat": "git+https",
      "sha256": "https://github.com/mlcommons/croissant/issues/80"
    },
    {
      "@type": "cr:FileSet",
      "@id": "parquet-files-for-config-default",
      "containedIn": {
        "@id": "repo"
      },
      "encodingFormat": "application/x-parquet",
      "includes": "default/*/*.parquet"
    }
  ],
  "recordSet": [
    {
      "@type": "cr:RecordSet",
      "dataType": "cr:Split",
      "key": {
        "@id": "default_splits/split_name"
      },
      "@id": "default_splits",
      "name": "default_splits",
      "description": "Splits for the default config.",
      "field": [
        {
          "@type": "cr:Field",
          "@id": "default_splits/split_name",
          "dataType": "sc:Text"
        }
      ],
      "data": [
        {
          "default_splits/split_name": "train"
        }
      ]
    },
    {
      "@type": "cr:RecordSet",
      "@id": "default",
      "description": "ai-chem/Synergy - 'default' subset",
      "field": [
        {
          "@type": "cr:Field",
          "@id": "default/split",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "fileProperty": "fullpath"
            },
            "transform": {
              "regex": "default/(?:partial-)?(train)/.+parquet$"
            }
          },
          "references": {
            "field": {
              "@id": "default_splits/split_name"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/sn",
          "dataType": "cr:Int64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "sn"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/NP",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "NP"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/bacteria",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "bacteria"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/strain",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "strain"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/NP_synthesis",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "NP_synthesis"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/drug",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "drug"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/drug_dose__g_disk",
          "dataType": "cr:Float64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "drug_dose_µg_disk"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/NP_concentration__g_ml",
          "dataType": "cr:Float64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "NP_concentration_µg_ml"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/NP_size_min_nm",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "NP_size_min_nm"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/NP_size_max_nm",
          "dataType": "cr:Float64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "NP_size_max_nm"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/NP_size_avg_nm",
          "dataType": "cr:Float64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "NP_size_avg_nm"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/shape",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "shape"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/method",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "method"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/ZOI_drug_mm_or_MIC___g_ml",
          "dataType": "cr:Float64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "ZOI_drug_mm_or_MIC _µg_ml"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/error_ZOI_drug_mm_or_MIC__g_ml",
          "dataType": "cr:Float64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "error_ZOI_drug_mm_or_MIC_µg_ml"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/ZOI_NP_mm_or_MIC_np__g_ml",
          "dataType": "cr:Float64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "ZOI_NP_mm_or_MIC_np_µg_ml"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/error_ZOI_NP_mm_or_MIC_np__g_ml",
          "dataType": "cr:Float64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "error_ZOI_NP_mm_or_MIC_np_µg_ml"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/ZOI_drug_NP_mm_or_MIC_drug_NP__g_ml",
          "dataType": "cr:Float64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "ZOI_drug_NP_mm_or_MIC_drug_NP_µg_ml"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/error_ZOI_drug_NP_mm_or_MIC_drug_NP__g_ml",
          "dataType": "cr:Float64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "error_ZOI_drug_NP_mm_or_MIC_drug_NP_µg_ml"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/fold_increase_in_antibacterial_activity",
          "dataType": "cr:Float64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "fold_increase_in_antibacterial_activity"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/zeta_potential_mV",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "zeta_potential_mV"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/MDR",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "MDR"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/FIC",
          "dataType": "cr:Float64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "FIC"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/effect",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "effect"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/reference",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "reference"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/doi",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "doi"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/article_list",
          "dataType": "cr:Int64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "article_list"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/time_hr",
          "dataType": "cr:Float64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "time_hr"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/coating_with_antimicrobial_peptide_polymers",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "coating_with_antimicrobial_peptide_polymers"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/combined_MIC",
          "dataType": "cr:Float64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "combined_MIC"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/peptide_MIC",
          "dataType": "cr:Float64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "peptide_MIC"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/viability__",
          "dataType": "cr:Float64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "viability_%"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/viability_error",
          "dataType": "cr:Float64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "viability_error"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/journal_name",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "journal_name"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/publisher",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "publisher"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/year",
          "dataType": "cr:Int64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "year"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/title",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "title"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/journal_is_oa",
          "dataType": "sc:Boolean",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "journal_is_oa"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/is_oa",
          "dataType": "sc:Boolean",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "is_oa"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/oa_status",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "oa_status"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/pdf",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "pdf"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "default/access",
          "dataType": "cr:Int64",
          "source": {
            "fileSet": {
              "@id": "parquet-files-for-config-default"
            },
            "extract": {
              "column": "access"
            }
          }
        }
      ]
    }
  ],
  "conformsTo": "http://mlcommons.org/croissant/1.1",
  "name": "Synergy",
  "identifier": "10.57967/hf/5575",
  "description": "ai-chem/Synergy dataset hosted on Hugging Face and contributed by the HF Datasets community",
  "alternateName": [
    "ai-chem/Synergy"
  ],
  "creator": {
    "@type": "Organization",
    "name": "Center for AI in Chemistry @ ITMO University",
    "url": "https://huggingface.co/ai-chem"
  },
  "keywords": [
    "mit",
    "1K - 10K",
    "parquet",
    "Tabular",
    "Text",
    "Datasets",
    "pandas",
    "Croissant",
    "Polars",
    "doi:10.57967/hf/5575",
    "🇺🇸 Region: US"
  ],
  "citeAs": "@misc{center_for_ai_in_chemistry_@_itmo_university_2025, author = { Center for AI in Chemistry @ ITMO University }, title = { Synergy (Revision 7d4992f) }, year = 2025, url = { https://huggingface.co/datasets/ai-chem/Synergy }, doi = { 10.57967/hf/5575 }, publisher = { Hugging Face } }",
  "version": 1.0,
  "isAccessibleForFree": true,
  "datePublished": "2025-05-15",
  "rai:dataCollection": "The data was collected from full-text articles in PDF format and supplementary materials. Extraction was performed manually by annotators with expertise in chemistry.",
  "rai:personalSensitiveInformation": "There is no personal or sensitive information in the dataset. Only depersonalized experimental data from scientific publications is used, including data on cell lines.",
  "rai:annotatorDemographics": "The datasets were annotated by Russian women, aged 22 to 25, who are master's and PhD students specializing in chemistry and data science.",
  "rai:dataBiases": "Potential biases include the uneven distribution of nanoparticles and antibiotics, the predominance of specific bacterial strains, publication bias due to reliance on previously published studies, and missing values in several columns such as MIC, zeta potential, and coating.",
  "rai:dataLimitations": "The data was obtained under in vitro laboratory conditions and does not reflect in vivo effects or behavior. Some values, such as MIC, coating, or zeta potential, may be missing, and methodological variations between publications are possible.",
  "rai:dataSocialImpact": "The dataset aims to enhance the extraction algorithms for nanoparticles and their toxicity, enabling more effective applications in medicine and environmental science.",
  "license": "https://choosealicense.com/licenses/mit/",
  "url": "https://huggingface.co/datasets/ai-chem/Synergy"
}