{"@context":{"@language":"en","@vocab":"https://schema.org/","arrayShape":"cr:arrayShape","citeAs":"cr:citeAs","column":"cr:column","conformsTo":"dct:conformsTo","cr":"http://mlcommons.org/croissant/","data":{"@id":"cr:data","@type":"@json"},"dataBiases":"cr:dataBiases","dataCollection":"cr:dataCollection","dataType":{"@id":"cr:dataType","@type":"@vocab"},"dct":"http://purl.org/dc/terms/","extract":"cr:extract","field":"cr:field","fileProperty":"cr:fileProperty","fileObject":"cr:fileObject","fileSet":"cr:fileSet","format":"cr:format","includes":"cr:includes","isArray":"cr:isArray","isLiveDataset":"cr:isLiveDataset","jsonPath":"cr:jsonPath","key":"cr:key","md5":"cr:md5","parentField":"cr:parentField","path":"cr:path","personalSensitiveInformation":"cr:personalSensitiveInformation","recordSet":"cr:recordSet","references":"cr:references","regex":"cr:regex","repeated":"cr:repeated","replace":"cr:replace","sc":"https://schema.org/","separator":"cr:separator","source":"cr:source","subField":"cr:subField","transform":"cr:transform"},"@type":"sc:Dataset","distribution":[{"@type":"cr:FileObject","@id":"repo","name":"repo","description":"The Hugging Face git repository.","contentUrl":"ANONYMIZED","encodingFormat":"git+https","sha256":"https://github.com/mlcommons/croissant/issues/80"},{"@type":"cr:FileSet","@id":"parquet-files-for-config-default","containedIn":{"@id":"repo"},"encodingFormat":"application/x-parquet","includes":"default/*/*.parquet"}],"recordSet":[{"@type":"cr:RecordSet","dataType":"cr:Split","key":{"@id":"default_splits/split_name"},"@id":"default_splits","name":"default_splits","description":"Splits for the default config.","field":[{"@type":"cr:Field","@id":"default_splits/split_name","dataType":"sc:Text"}],"data":[{"default_splits/split_name":"train"}]},{"@type":"cr:RecordSet","@id":"default","description":"ANONYMIZED - 'default' subset (first 5GB)","field":[{"@type":"cr:Field","@id":"default/split","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"fileProperty":"fullpath"},"transform":{"regex":"default/(?:partial-)?(train)/.+parquet$"}},"references":{"field":{"@id":"default_splits/split_name"}}},{"@type":"cr:Field","@id":"default/genome_name","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"genome_name"}}},{"@type":"cr:Field","@id":"default/contig_name","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"contig_name"}},"isArray":true,"arrayShape":"-1"},{"@type":"cr:Field","@id":"default/dna_sequence","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"dna_sequence"}}},{"@type":"cr:Field","@id":"default/taxid","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"taxid"}}}]}],"conformsTo":"http://mlcommons.org/croissant/1.1","name":"bacbench-antibiotic-resistance-dna","description":"\n\t\n\t\t\n\t\tDataset for antibiotic resistance prediction from whole-bacterial genomes (DNA)\n\t\n\nA dataset of 25,032 bacterial genomes across 39 species with antimicrobial resistance labels.\nThe genome DNA sequences have been extracted from GenBank. Each row contains whole bacterial genome, with spaces\nseparating different contigs present in the genome.\nThe antimicrobial resistance labels have been extracted from Antibiotic Susceptibility Test (AST) Browser, accessed 23 Oct, 2024.)\nand include both… See the full description on the dataset page: ANONYMIZED.","alternateName":["ANONYMIZED"],"creator":{"@type":"Person","name":"ANONYMIZED","url":"ANONYMIZED"},"keywords":["10K - 100K","parquet","Text","Datasets","Dask","Croissant","Polars","🇺🇸 Region: US"],"url":"ANONYMIZED"}