{"@context":{"@language":"en","@vocab":"https://schema.org/","citeAs":"cr:citeAs","column":"cr:column","conformsTo":"dct:conformsTo","cr":"http://mlcommons.org/croissant/","data":{"@id":"cr:data","@type":"@json"},"dataBiases":"cr:dataBiases","dataCollection":"cr:dataCollection","dataType":{"@id":"cr:dataType","@type":"@vocab"},"dct":"http://purl.org/dc/terms/","extract":"cr:extract","field":"cr:field","fileProperty":"cr:fileProperty","fileObject":"cr:fileObject","fileSet":"cr:fileSet","format":"cr:format","includes":"cr:includes","isLiveDataset":"cr:isLiveDataset","jsonPath":"cr:jsonPath","key":"cr:key","md5":"cr:md5","parentField":"cr:parentField","path":"cr:path","personalSensitiveInformation":"cr:personalSensitiveInformation","recordSet":"cr:recordSet","references":"cr:references","regex":"cr:regex","repeated":"cr:repeated","replace":"cr:replace","sc":"https://schema.org/","separator":"cr:separator","source":"cr:source","subField":"cr:subField","transform":"cr:transform"},"@type":"sc:Dataset","distribution":[{"@type":"cr:FileObject","@id":"repo","name":"repo","description":"The Hugging Face git repository.","contentUrl":"https://huggingface.co/datasets/flwrlabs/code-alpaca-20k/tree/refs%2Fconvert%2Fparquet","encodingFormat":"git+https","sha256":"https://github.com/mlcommons/croissant/issues/80"},{"@type":"cr:FileSet","@id":"parquet-files-for-config-default","containedIn":{"@id":"repo"},"encodingFormat":"application/x-parquet","includes":"default/*/*.parquet"}],"recordSet":[{"@type":"cr:RecordSet","dataType":"cr:Split","key":{"@id":"default_splits/split_name"},"@id":"default_splits","name":"default_splits","description":"Splits for the default config.","field":[{"@type":"cr:Field","@id":"default_splits/split_name","dataType":"sc:Text"}],"data":[{"default_splits/split_name":"train"}]},{"@type":"cr:RecordSet","@id":"default","description":"flwrlabs/code-alpaca-20k - 'default' subset","field":[{"@type":"cr:Field","@id":"default/split","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"fileProperty":"fullpath"},"transform":{"regex":"default/(?:partial-)?(train)/.+parquet$"}},"references":{"field":{"@id":"default_splits/split_name"}}},{"@type":"cr:Field","@id":"default/instruction","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"instruction"}}},{"@type":"cr:Field","@id":"default/input","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"input"}}},{"@type":"cr:Field","@id":"default/output","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"output"}}}]}],"conformsTo":"http://mlcommons.org/croissant/1.0","name":"code-alpaca-20k","description":"\n\t\n\t\t\n\t\tDataset Card for CodeAlpaca 20K\n\t\n\nThis dataset originates from the Code Alpaca repository.\nThe CodeAlpaca 20K dataset is specifically used for training code generation models.\n\n\t\n\t\t\n\t\tDataset Details\n\t\n\n\n\t\n\t\t\n\t\tDataset Description\n\t\n\nEach sample is comprised of three columns: instruction, input and output. \n\nLanguage(s): English\nLicense: Apache-2.0 License\n\n\n\t\n\t\t\n\t\tDataset Sources\n\t\n\nThe code from the original repository was adopted to post it here. \n\nRepository:… See the full description on the dataset page: https://huggingface.co/datasets/flwrlabs/code-alpaca-20k.","alternateName":["flwrlabs/code-alpaca-20k","CodeAlpaca 20K"],"creator":{"@type":"Organization","name":"Flower Labs","url":"https://huggingface.co/flwrlabs"},"keywords":["text-generation","English","apache-2.0","10K - 100K","json","Text","Datasets","pandas","Croissant","Polars","arxiv:2007.14390","🇺🇸 Region: US","code"],"license":"https://choosealicense.com/licenses/apache-2.0/","url":"https://huggingface.co/datasets/flwrlabs/code-alpaca-20k"}
