{
  "@context": {
    "@language": "en",
    "@vocab": "https://schema.org/",
    "citeAs": "cr:citeAs",
    "column": "cr:column",
    "conformsTo": "dct:conformsTo",
    "cr": "http://mlcommons.org/croissant/",
    "rai": "http://mlcommons.org/croissant/RAI/",
    "data": {
      "@id": "cr:data",
      "@type": "@json"
    },
    "dataType": {
      "@id": "cr:dataType",
      "@type": "@vocab"
    },
    "dct": "http://purl.org/dc/terms/",
    "examples": {
      "@id": "cr:examples",
      "@type": "@json"
    },
    "extract": "cr:extract",
    "field": "cr:field",
    "fileProperty": "cr:fileProperty",
    "fileObject": "cr:fileObject",
    "fileSet": "cr:fileSet",
    "format": "cr:format",
    "includes": "cr:includes",
    "isLiveDataset": "cr:isLiveDataset",
    "jsonPath": "cr:jsonPath",
    "key": "cr:key",
    "md5": "cr:md5",
    "parentField": "cr:parentField",
    "path": "cr:path",
    "recordSet": "cr:recordSet",
    "references": "cr:references",
    "regex": "cr:regex",
    "repeated": "cr:repeated",
    "replace": "cr:replace",
    "sc": "https://schema.org/",
    "separator": "cr:separator",
    "source": "cr:source",
    "subField": "cr:subField",
    "transform": "cr:transform"
  },
  "@type": "sc:Dataset",
  "name": "oligogym-Martinelli_2023_1_processed-dataset",
  "description": "A curated siRNA potency dataset that contain only sugar and base information.",
  "conformsTo": "http://mlcommons.org/croissant/1.0",
  "license": "Apache-2.0",
  "url": "https://github.com/Roche/oligogym/tree/main/oligogym/resources/pkg_dataset/Martinelli_2023_1_processed.csv.gz",
  "distribution": [
    {
      "@type": "cr:FileObject",
      "@id": "oligogym-github-repository",
      "name": "oligogym-github-repository",
      "description": "Roche repository on GitHub for oligogym, containing various datasets for oligonucleotide drug discovery.",
      "contentUrl": "https://github.com/Roche/oligogym",
      "encodingFormat": "git+https",
      "sha256": "main"
    },
    {
      "@type": "cr:FileSet",
      "@id": "fileset-Martinelli_2023_1_processed-csv-gz",
      "name": "Martinelli_2023_1_processed_csv_gz_file",
      "description": "GZIP-compressed CSV file: Martinelli_2023_1_processed.csv.gz for the Martinelli_2023_1_processed dataset, hosted on the oligogym GitHub repository.",
      "containedIn": {
        "@id": "oligogym-github-repository"
      },
      "encodingFormat": [
        "text/csv"
      ],
      "includes": "oligogym/resources/pkg_dataset/Martinelli_2023_1_processed.csv.gz"
    }
  ],
  "recordSet": [
    {
      "@type": "cr:RecordSet",
      "@id": "recordset-Martinelli_2023_1_processed",
      "name": "Martinelli_2023_1_processed",
      "description": "A curated siRNA potency dataset that contain only sugar and base information.",
      "field": [
        {
          "@type": "cr:Field",
          "@id": "field-Martinelli_2023_1_processed-x",
          "name": "x",
          "description": "Input sequence stored as HELM notation.",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "fileset-Martinelli_2023_1_processed-csv-gz"
            },
            "extract": {
              "column": "x"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "field-Martinelli_2023_1_processed-y",
          "name": "y",
          "description": "Percentage knockdown of the target mRNA",
          "dataType": "sc:Float",
          "source": {
            "fileSet": {
              "@id": "fileset-Martinelli_2023_1_processed-csv-gz"
            },
            "extract": {
              "column": "y"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "field-Martinelli_2023_1_processed-y_raw",
          "name": "y_raw",
          "description": "Raw target variable.",
          "dataType": "sc:Float",
          "source": {
            "fileSet": {
              "@id": "fileset-Martinelli_2023_1_processed-csv-gz"
            },
            "extract": {
              "column": "y_raw"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "field-Martinelli_2023_1_processed-targets",
          "name": "targets",
          "description": "Target identifiers or categories.",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "fileset-Martinelli_2023_1_processed-csv-gz"
            },
            "extract": {
              "column": "targets"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "field-Martinelli_2023_1_processed-smiles",
          "name": "smiles",
          "description": "SMILES representation of the molecule.",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "fileset-Martinelli_2023_1_processed-csv-gz"
            },
            "extract": {
              "column": "smiles"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "field-Martinelli_2023_1_processed-fasta",
          "name": "fasta",
          "description": "FASTA sequence.",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "fileset-Martinelli_2023_1_processed-csv-gz"
            },
            "extract": {
              "column": "fasta"
            }
          }
        },
        {
          "@type": "cr:Field",
          "@id": "field-Martinelli_2023_1_processed-csv_gz_filename",
          "name": "csv_gz_filename",
          "description": "The name of the source GZIP-compressed CSV file for this record.",
          "dataType": "sc:Text",
          "source": {
            "fileSet": {
              "@id": "fileset-Martinelli_2023_1_processed-csv-gz"
            },
            "extract": {
              "fileProperty": "filename"
            }
          }
        }
      ]
    }
  ]
}
