{"@context":{"@language":"en","@vocab":"https://schema.org/","citeAs":"cr:citeAs","column":"cr:column","conformsTo":"dct:conformsTo","cr":"http://mlcommons.org/croissant/","data":{"@id":"cr:data","@type":"@json"},"dataBiases":"cr:dataBiases","dataCollection":"cr:dataCollection","dataType":{"@id":"cr:dataType","@type":"@vocab"},"dct":"http://purl.org/dc/terms/","extract":"cr:extract","field":"cr:field","fileProperty":"cr:fileProperty","fileObject":"cr:fileObject","fileSet":"cr:fileSet","format":"cr:format","includes":"cr:includes","isLiveDataset":"cr:isLiveDataset","jsonPath":"cr:jsonPath","key":"cr:key","md5":"cr:md5","parentField":"cr:parentField","path":"cr:path","personalSensitiveInformation":"cr:personalSensitiveInformation","recordSet":"cr:recordSet","references":"cr:references","regex":"cr:regex","repeated":"cr:repeated","replace":"cr:replace","sc":"https://schema.org/","separator":"cr:separator","source":"cr:source","subField":"cr:subField","transform":"cr:transform"},"@type":"sc:Dataset","distribution":[{"@type":"cr:FileObject","@id":"repo","name":"repo","description":"The Hugging Face git repository.","contentUrl":"https://huggingface.co/datasets/JetBrains-Research/lca-module-summarization/tree/refs%2Fconvert%2Fparquet","encodingFormat":"git+https","sha256":"https://github.com/mlcommons/croissant/issues/80"},{"@type":"cr:FileSet","@id":"parquet-files-for-config-default","name":"parquet-files-for-config-default","description":"The underlying Parquet files as converted by Hugging Face (see: https://huggingface.co/docs/dataset-viewer/parquet).","containedIn":{"@id":"repo"},"encodingFormat":"application/x-parquet","includes":"default/*/*.parquet"}],"recordSet":[{"@type":"cr:RecordSet","dataType":"cr:Split","key":{"@id":"default_splits/split_name"},"@id":"default_splits","name":"default_splits","description":"Splits for the default config.","field":[{"@type":"cr:Field","@id":"default_splits/split_name","name":"split_name","description":"The name of the split.","dataType":"sc:Text"}],"data":[{"default_splits/split_name":"test"}]},{"@type":"cr:RecordSet","@id":"default","name":"default","description":"JetBrains-Research/lca-module-summarization - 'default' subset","field":[{"@type":"cr:Field","@id":"default/split","name":"default/split","description":"Split to which the example belongs to.","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"fileProperty":"fullpath"},"transform":{"regex":"default/(?:partial-)?(test)/.+parquet$"}},"references":{"field":{"@id":"default_splits/split_name"}}},{"@type":"cr:Field","@id":"default/repo","name":"default/repo","description":"Column 'repo' from the Hugging Face parquet file.","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"repo"}}},{"@type":"cr:Field","@id":"default/docfile_name","name":"default/docfile_name","description":"Column 'docfile_name' from the Hugging Face parquet file.","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"docfile_name"}}},{"@type":"cr:Field","@id":"default/doc_type","name":"default/doc_type","description":"Column 'doc_type' from the Hugging Face parquet file.","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"doc_type"}}},{"@type":"cr:Field","@id":"default/intent","name":"default/intent","description":"Column 'intent' from the Hugging Face parquet file.","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"intent"}}},{"@type":"cr:Field","@id":"default/license","name":"default/license","description":"Column 'license' from the Hugging Face parquet file.","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"license"}}},{"@type":"cr:Field","@id":"default/path_to_docfile","name":"default/path_to_docfile","description":"Column 'path_to_docfile' from the Hugging Face parquet file.","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"path_to_docfile"}}},{"@type":"cr:Field","@id":"default/relevant_code_files","name":"default/relevant_code_files","description":"Column 'relevant_code_files' from the Hugging Face parquet file.","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"relevant_code_files"}},"repeated":true},{"@type":"cr:Field","@id":"default/relevant_code_dir","name":"default/relevant_code_dir","description":"Column 'relevant_code_dir' from the Hugging Face parquet file.","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"relevant_code_dir"}}},{"@type":"cr:Field","@id":"default/target_text","name":"default/target_text","description":"Column 'target_text' from the Hugging Face parquet file.","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"target_text"}}},{"@type":"cr:Field","@id":"default/relevant_code_context","name":"default/relevant_code_context","description":"Column 'relevant_code_context' from the Hugging Face parquet file.","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-default"},"extract":{"column":"relevant_code_context"}}}]}],"conformsTo":"http://mlcommons.org/croissant/1.0","name":"lca-module-summarization","identifier":"10.57967/hf/2515","description":"\n\t\n\t\t\n\t\t🏟️ Long Code Arena (Module summarization)\n\t\n\nThis is the benchmark for Module summarization task as part of the\n🏟️ Long Code Arena benchmark. \nThe current version includes 216 manually curated text files describing different documentation of open-source permissive Python projects. \nThe model is required to generate such description, given the relevant context code and the intent behind the documentation.\nAll the repositories are published under permissive licenses (MIT, Apache-2.0… See the full description on the dataset page: https://huggingface.co/datasets/JetBrains-Research/lca-module-summarization.","alternateName":["JetBrains-Research/lca-module-summarization"],"creator":{"@type":"Organization","name":"JetBrains Research","url":"https://huggingface.co/JetBrains-Research"},"keywords":["text-generation","summarization","English","apache-2.0","< 1K","parquet","Image","Text","Datasets","pandas","Croissant","Polars","arxiv:2406.11612","doi:10.57967/hf/2515","🇺🇸 Region: US"],"license":"https://choosealicense.com/licenses/apache-2.0/","url":"https://huggingface.co/datasets/JetBrains-Research/lca-module-summarization"}