{"@context":{"@language":"en","@vocab":"https://schema.org/","citeAs":"cr:citeAs","column":"cr:column","conformsTo":"dct:conformsTo","cr":"http://mlcommons.org/croissant/","data":{"@id":"cr:data","@type":"@json"},"dataBiases":"cr:dataBiases","dataCollection":"cr:dataCollection","dataType":{"@id":"cr:dataType","@type":"@vocab"},"dct":"http://purl.org/dc/terms/","extract":"cr:extract","field":"cr:field","fileProperty":"cr:fileProperty","fileObject":"cr:fileObject","fileSet":"cr:fileSet","format":"cr:format","includes":"cr:includes","isLiveDataset":"cr:isLiveDataset","jsonPath":"cr:jsonPath","key":"cr:key","md5":"cr:md5","parentField":"cr:parentField","path":"cr:path","personalSensitiveInformation":"cr:personalSensitiveInformation","recordSet":"cr:recordSet","references":"cr:references","regex":"cr:regex","repeated":"cr:repeated","replace":"cr:replace","sc":"https://schema.org/","separator":"cr:separator","source":"cr:source","subField":"cr:subField","transform":"cr:transform"},"@type":"sc:Dataset","distribution":[{"@type":"cr:FileObject","@id":"repo","name":"repo","description":"The Hugging Face git repository.","contentUrl":"https://huggingface.co/datasets/JetBrains-Research/lca-commit-message-generation/tree/refs%2Fconvert%2Fparquet","encodingFormat":"git+https","sha256":"https://github.com/mlcommons/croissant/issues/80"},{"@type":"cr:FileSet","@id":"parquet-files-for-config-default","containedIn":{"@id":"repo"},"encodingFormat":"application/x-parquet","includes":"default/*/*.parquet"},{"@type":"cr:FileSet","@id":"parquet-files-for-config-full_files","containedIn":{"@id":"repo"},"encodingFormat":"application/x-parquet","includes":"full_files/*/*.parquet"},{"@type":"cr:FileSet","@id":"parquet-files-for-config-labels","containedIn":{"@id":"repo"},"encodingFormat":"application/x-parquet","includes":"labels/*/*.parquet"},{"@type":"cr:FileSet","@id":"parquet-files-for-config-retrieval_bm25","containedIn":{"@id":"repo"},"encodingFormat":"application/x-parquet","includes":"retrieval_bm25/*/*.parquet"}],"recordSet":[{"@type":"cr:RecordSet","dataType":"cr:Split","key":{"@id":"default_splits/split_name"},"@id":"default_splits","name":"default_splits","description":"Splits for the default config.","field":[{"@type":"cr:Field","@id":"default_splits/split_name","dataType":"sc:Text"}],"data":[{"default_splits/split_name":"test"}]},{"@type":"cr:RecordSet","@id":"default","description":"JetBrains-Research/lca-commit-message-generation - 'default' subset","field":[{"@type":"cr:Field","@id":"default/split","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"fileProperty":"fullpath"},"transform":{"regex":"default/(?:partial-)?(test)/.+parquet$"}},"references":{"field":{"@id":"default_splits/split_name"}}},{"@type":"cr:Field","@id":"default/hash","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"hash"}}},{"@type":"cr:Field","@id":"default/repo","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"repo"}}},{"@type":"cr:Field","@id":"default/date","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"date"}}},{"@type":"cr:Field","@id":"default/license","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"license"}}},{"@type":"cr:Field","@id":"default/message","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"message"}}},{"@type":"cr:Field","@id":"default/mods","subField":[{"@type":"cr:Field","@id":"default/mods/change_type","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"mods"},"transform":{"jsonPath":"change_type"}}},{"@type":"cr:Field","@id":"default/mods/old_path","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"mods"},"transform":{"jsonPath":"old_path"}}},{"@type":"cr:Field","@id":"default/mods/new_path","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"mods"},"transform":{"jsonPath":"new_path"}}},{"@type":"cr:Field","@id":"default/mods/diff","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"mods"},"transform":{"jsonPath":"diff"}}}],"repeated":true}]},{"@type":"cr:RecordSet","dataType":"cr:Split","key":{"@id":"full_files_splits/split_name"},"@id":"full_files_splits","name":"full_files_splits","description":"Splits for the full_files config.","field":[{"@type":"cr:Field","@id":"full_files_splits/split_name","dataType":"sc:Text"}],"data":[{"full_files_splits/split_name":"4k"},{"full_files_splits/split_name":"8k"},{"full_files_splits/split_name":"16k"},{"full_files_splits/split_name":"32k"},{"full_files_splits/split_name":"64k"},{"full_files_splits/split_name":"full"}]},{"@type":"cr:RecordSet","@id":"full_files","description":"JetBrains-Research/lca-commit-message-generation - 'full_files' subset\n\nAdditional information:\n- 6 splits: 4k, 8k, 16k, 32k, 64k, full","field":[{"@type":"cr:Field","@id":"full_files/split","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-full_files"},"extract":{"fileProperty":"fullpath"},"transform":{"regex":"full_files/(?:partial-)?(4k|8k|16k|32k|64k|full)/.+parquet$"}},"references":{"field":{"@id":"full_files_splits/split_name"}}},{"@type":"cr:Field","@id":"full_files/hash","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-full_files"},"extract":{"column":"hash"}}},{"@type":"cr:Field","@id":"full_files/repo","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-full_files"},"extract":{"column":"repo"}}},{"@type":"cr:Field","@id":"full_files/files","subField":[{"@type":"cr:Field","@id":"full_files/files/new_contents","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-full_files"},"extract":{"column":"files"},"transform":{"jsonPath":"new_contents"}}},{"@type":"cr:Field","@id":"full_files/files/new_path","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-full_files"},"extract":{"column":"files"},"transform":{"jsonPath":"new_path"}}},{"@type":"cr:Field","@id":"full_files/files/old_contents","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-full_files"},"extract":{"column":"files"},"transform":{"jsonPath":"old_contents"}}},{"@type":"cr:Field","@id":"full_files/files/old_path","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-full_files"},"extract":{"column":"files"},"transform":{"jsonPath":"old_path"}}}],"repeated":true}]},{"@type":"cr:RecordSet","dataType":"cr:Split","key":{"@id":"labels_splits/split_name"},"@id":"labels_splits","name":"labels_splits","description":"Splits for the labels config.","field":[{"@type":"cr:Field","@id":"labels_splits/split_name","dataType":"sc:Text"}],"data":[{"labels_splits/split_name":"test"}]},{"@type":"cr:RecordSet","@id":"labels","description":"JetBrains-Research/lca-commit-message-generation - 'labels' subset\n\nAdditional information:\n- 1 skipped column: label","field":[{"@type":"cr:Field","@id":"labels/split","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-labels"},"extract":{"fileProperty":"fullpath"},"transform":{"regex":"labels/(?:partial-)?(test)/.+parquet$"}},"references":{"field":{"@id":"labels_splits/split_name"}}},{"@type":"cr:Field","@id":"labels/hash","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-labels"},"extract":{"column":"hash"}}},{"@type":"cr:Field","@id":"labels/repo","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-labels"},"extract":{"column":"repo"}}},{"@type":"cr:Field","@id":"labels/date","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-labels"},"extract":{"column":"date"}}},{"@type":"cr:Field","@id":"labels/license","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-labels"},"extract":{"column":"license"}}},{"@type":"cr:Field","@id":"labels/message","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-labels"},"extract":{"column":"message"}}},{"@type":"cr:Field","@id":"labels/comment","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-labels"},"extract":{"column":"comment"}}}]},{"@type":"cr:RecordSet","dataType":"cr:Split","key":{"@id":"retrieval_bm25_splits/split_name"},"@id":"retrieval_bm25_splits","name":"retrieval_bm25_splits","description":"Splits for the retrieval_bm25 config.","field":[{"@type":"cr:Field","@id":"retrieval_bm25_splits/split_name","dataType":"sc:Text"}],"data":[{"retrieval_bm25_splits/split_name":"4k"},{"retrieval_bm25_splits/split_name":"8k"},{"retrieval_bm25_splits/split_name":"16k"},{"retrieval_bm25_splits/split_name":"32k"},{"retrieval_bm25_splits/split_name":"64k"}]},{"@type":"cr:RecordSet","@id":"retrieval_bm25","description":"JetBrains-Research/lca-commit-message-generation - 'retrieval_bm25' subset\n\nAdditional information:\n- 5 splits: 4k, 8k, 16k, 32k, 64k","field":[{"@type":"cr:Field","@id":"retrieval_bm25/split","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-retrieval_bm25"},"extract":{"fileProperty":"fullpath"},"transform":{"regex":"retrieval_bm25/(?:partial-)?(4k|8k|16k|32k|64k)/.+parquet$"}},"references":{"field":{"@id":"retrieval_bm25_splits/split_name"}}},{"@type":"cr:Field","@id":"retrieval_bm25/hash","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-retrieval_bm25"},"extract":{"column":"hash"}}},{"@type":"cr:Field","@id":"retrieval_bm25/repo","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-retrieval_bm25"},"extract":{"column":"repo"}}},{"@type":"cr:Field","@id":"retrieval_bm25/mods","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-retrieval_bm25"},"extract":{"column":"mods"}}},{"@type":"cr:Field","@id":"retrieval_bm25/context","subField":[{"@type":"cr:Field","@id":"retrieval_bm25/context/source","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-retrieval_bm25"},"extract":{"column":"context"},"transform":{"jsonPath":"source"}}},{"@type":"cr:Field","@id":"retrieval_bm25/context/content","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-retrieval_bm25"},"extract":{"column":"context"},"transform":{"jsonPath":"content"}}}],"repeated":true}]}],"conformsTo":"http://mlcommons.org/croissant/1.0","name":"lca-commit-message-generation","identifier":"10.57967/hf/2513","description":"\n\t\n\t\t\n\t\t🏟️ Long Code Arena (Commit message generation)\n\t\n\nThis is the benchmark for the Commit message generation task as part of the\n🏟️ Long Code Arena benchmark.\nThe dataset is a manually curated subset of the Python test set from the 🤗 CommitChronicle dataset, tailored for larger commits. \nAll the repositories are published under permissive licenses (MIT, Apache-2.0, and BSD-3-Clause). The datapoints can be removed upon request.\n\n\t\n\t\t\n\t\n\t\n\t\tHow-to\n\t\n\nfrom datasets import load_dataset… See the full description on the dataset page: https://huggingface.co/datasets/JetBrains-Research/lca-commit-message-generation.","alternateName":["JetBrains-Research/lca-commit-message-generation"],"creator":{"@type":"Organization","name":"JetBrains Research","url":"https://huggingface.co/JetBrains-Research"},"keywords":["apache-2.0","1K - 10K","parquet","Text","Datasets","pandas","Croissant","Polars","arxiv:2406.11612","doi:10.57967/hf/2513","🇺🇸 Region: US"],"license":"https://choosealicense.com/licenses/apache-2.0/","url":"https://huggingface.co/datasets/JetBrains-Research/lca-commit-message-generation"}