{"@context":{"@language":"en","@vocab":"https://schema.org/","arrayShape":"cr:arrayShape","citeAs":"cr:citeAs","column":"cr:column","conformsTo":"dct:conformsTo","cr":"http://mlcommons.org/croissant/","data":{"@id":"cr:data","@type":"@json"},"dataBiases":"cr:dataBiases","dataCollection":"cr:dataCollection","dataType":{"@id":"cr:dataType","@type":"@vocab"},"dct":"http://purl.org/dc/terms/","extract":"cr:extract","field":"cr:field","fileProperty":"cr:fileProperty","fileObject":"cr:fileObject","fileSet":"cr:fileSet","format":"cr:format","includes":"cr:includes","isArray":"cr:isArray","isLiveDataset":"cr:isLiveDataset","jsonPath":"cr:jsonPath","key":"cr:key","md5":"cr:md5","parentField":"cr:parentField","path":"cr:path","personalSensitiveInformation":"cr:personalSensitiveInformation","recordSet":"cr:recordSet","references":"cr:references","regex":"cr:regex","repeated":"cr:repeated","replace":"cr:replace","sc":"https://schema.org/","separator":"cr:separator","source":"cr:source","subField":"cr:subField","transform":"cr:transform"},"@type":"sc:Dataset","distribution":[{"@type":"cr:FileObject","@id":"repo","name":"repo","description":"The Hugging Face git repository.","contentUrl":"https://huggingface.co/datasets/freshstack/corpus-oct-2024/tree/refs%2Fconvert%2Fparquet","encodingFormat":"git+https","sha256":"https://github.com/mlcommons/croissant/issues/80"},{"@type":"cr:FileSet","@id":"parquet-files-for-config-angular","containedIn":{"@id":"repo"},"encodingFormat":"application/x-parquet","includes":"angular/*/*.parquet"},{"@type":"cr:FileSet","@id":"parquet-files-for-config-godot","containedIn":{"@id":"repo"},"encodingFormat":"application/x-parquet","includes":"godot/*/*.parquet"},{"@type":"cr:FileSet","@id":"parquet-files-for-config-langchain","containedIn":{"@id":"repo"},"encodingFormat":"application/x-parquet","includes":"langchain/*/*.parquet"},{"@type":"cr:FileSet","@id":"parquet-files-for-config-laravel","containedIn":{"@id":"repo"},"encodingFormat":"application/x-parquet","includes":"laravel/*/*.parquet"},{"@type":"cr:FileSet","@id":"parquet-files-for-config-yolo","containedIn":{"@id":"repo"},"encodingFormat":"application/x-parquet","includes":"yolo/*/*.parquet"}],"recordSet":[{"@type":"cr:RecordSet","dataType":"cr:Split","key":{"@id":"angular_splits/split_name"},"@id":"angular_splits","name":"angular_splits","description":"Splits for the angular config.","field":[{"@type":"cr:Field","@id":"angular_splits/split_name","dataType":"sc:Text"}],"data":[{"angular_splits/split_name":"train"}]},{"@type":"cr:RecordSet","@id":"angular","description":"freshstack/corpus-oct-2024 - 'angular' subset","field":[{"@type":"cr:Field","@id":"angular/split","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-angular"},"extract":{"fileProperty":"fullpath"},"transform":{"regex":"angular/(?:partial-)?(train)/.+parquet$"}},"references":{"field":{"@id":"angular_splits/split_name"}}},{"@type":"cr:Field","@id":"angular/_id","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-angular"},"extract":{"column":"_id"}}},{"@type":"cr:Field","@id":"angular/text","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-angular"},"extract":{"column":"text"}}},{"@type":"cr:Field","@id":"angular/metadata","subField":[{"@type":"cr:Field","@id":"angular/metadata/end_byte","dataType":"cr:Int64","source":{"fileSet":{"@id":"parquet-files-for-config-angular"},"extract":{"column":"metadata"},"transform":{"jsonPath":"end_byte"}}},{"@type":"cr:Field","@id":"angular/metadata/start_byte","dataType":"cr:Int64","source":{"fileSet":{"@id":"parquet-files-for-config-angular"},"extract":{"column":"metadata"},"transform":{"jsonPath":"start_byte"}}},{"@type":"cr:Field","@id":"angular/metadata/url","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-angular"},"extract":{"column":"metadata"},"transform":{"jsonPath":"url"}}}]}]},{"@type":"cr:RecordSet","dataType":"cr:Split","key":{"@id":"godot_splits/split_name"},"@id":"godot_splits","name":"godot_splits","description":"Splits for the godot config.","field":[{"@type":"cr:Field","@id":"godot_splits/split_name","dataType":"sc:Text"}],"data":[{"godot_splits/split_name":"train"}]},{"@type":"cr:RecordSet","@id":"godot","description":"freshstack/corpus-oct-2024 - 'godot' subset","field":[{"@type":"cr:Field","@id":"godot/split","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-godot"},"extract":{"fileProperty":"fullpath"},"transform":{"regex":"godot/(?:partial-)?(train)/.+parquet$"}},"references":{"field":{"@id":"godot_splits/split_name"}}},{"@type":"cr:Field","@id":"godot/_id","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-godot"},"extract":{"column":"_id"}}},{"@type":"cr:Field","@id":"godot/text","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-godot"},"extract":{"column":"text"}}},{"@type":"cr:Field","@id":"godot/metadata","subField":[{"@type":"cr:Field","@id":"godot/metadata/end_byte","dataType":"cr:Int64","source":{"fileSet":{"@id":"parquet-files-for-config-godot"},"extract":{"column":"metadata"},"transform":{"jsonPath":"end_byte"}}},{"@type":"cr:Field","@id":"godot/metadata/start_byte","dataType":"cr:Int64","source":{"fileSet":{"@id":"parquet-files-for-config-godot"},"extract":{"column":"metadata"},"transform":{"jsonPath":"start_byte"}}},{"@type":"cr:Field","@id":"godot/metadata/url","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-godot"},"extract":{"column":"metadata"},"transform":{"jsonPath":"url"}}}]}]},{"@type":"cr:RecordSet","dataType":"cr:Split","key":{"@id":"langchain_splits/split_name"},"@id":"langchain_splits","name":"langchain_splits","description":"Splits for the langchain config.","field":[{"@type":"cr:Field","@id":"langchain_splits/split_name","dataType":"sc:Text"}],"data":[{"langchain_splits/split_name":"train"}]},{"@type":"cr:RecordSet","@id":"langchain","description":"freshstack/corpus-oct-2024 - 'langchain' subset","field":[{"@type":"cr:Field","@id":"langchain/split","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-langchain"},"extract":{"fileProperty":"fullpath"},"transform":{"regex":"langchain/(?:partial-)?(train)/.+parquet$"}},"references":{"field":{"@id":"langchain_splits/split_name"}}},{"@type":"cr:Field","@id":"langchain/_id","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-langchain"},"extract":{"column":"_id"}}},{"@type":"cr:Field","@id":"langchain/text","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-langchain"},"extract":{"column":"text"}}},{"@type":"cr:Field","@id":"langchain/metadata","subField":[{"@type":"cr:Field","@id":"langchain/metadata/end_byte","dataType":"cr:Int64","source":{"fileSet":{"@id":"parquet-files-for-config-langchain"},"extract":{"column":"metadata"},"transform":{"jsonPath":"end_byte"}}},{"@type":"cr:Field","@id":"langchain/metadata/start_byte","dataType":"cr:Int64","source":{"fileSet":{"@id":"parquet-files-for-config-langchain"},"extract":{"column":"metadata"},"transform":{"jsonPath":"start_byte"}}},{"@type":"cr:Field","@id":"langchain/metadata/url","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-langchain"},"extract":{"column":"metadata"},"transform":{"jsonPath":"url"}}}]}]},{"@type":"cr:RecordSet","dataType":"cr:Split","key":{"@id":"laravel_splits/split_name"},"@id":"laravel_splits","name":"laravel_splits","description":"Splits for the laravel config.","field":[{"@type":"cr:Field","@id":"laravel_splits/split_name","dataType":"sc:Text"}],"data":[{"laravel_splits/split_name":"train"}]},{"@type":"cr:RecordSet","@id":"laravel","description":"freshstack/corpus-oct-2024 - 'laravel' subset","field":[{"@type":"cr:Field","@id":"laravel/split","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-laravel"},"extract":{"fileProperty":"fullpath"},"transform":{"regex":"laravel/(?:partial-)?(train)/.+parquet$"}},"references":{"field":{"@id":"laravel_splits/split_name"}}},{"@type":"cr:Field","@id":"laravel/_id","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-laravel"},"extract":{"column":"_id"}}},{"@type":"cr:Field","@id":"laravel/text","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-laravel"},"extract":{"column":"text"}}},{"@type":"cr:Field","@id":"laravel/metadata","subField":[{"@type":"cr:Field","@id":"laravel/metadata/end_byte","dataType":"cr:Int64","source":{"fileSet":{"@id":"parquet-files-for-config-laravel"},"extract":{"column":"metadata"},"transform":{"jsonPath":"end_byte"}}},{"@type":"cr:Field","@id":"laravel/metadata/start_byte","dataType":"cr:Int64","source":{"fileSet":{"@id":"parquet-files-for-config-laravel"},"extract":{"column":"metadata"},"transform":{"jsonPath":"start_byte"}}},{"@type":"cr:Field","@id":"laravel/metadata/url","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-laravel"},"extract":{"column":"metadata"},"transform":{"jsonPath":"url"}}}]}]},{"@type":"cr:RecordSet","dataType":"cr:Split","key":{"@id":"yolo_splits/split_name"},"@id":"yolo_splits","name":"yolo_splits","description":"Splits for the yolo config.","field":[{"@type":"cr:Field","@id":"yolo_splits/split_name","dataType":"sc:Text"}],"data":[{"yolo_splits/split_name":"train"}]},{"@type":"cr:RecordSet","@id":"yolo","description":"freshstack/corpus-oct-2024 - 'yolo' subset","field":[{"@type":"cr:Field","@id":"yolo/split","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-yolo"},"extract":{"fileProperty":"fullpath"},"transform":{"regex":"yolo/(?:partial-)?(train)/.+parquet$"}},"references":{"field":{"@id":"yolo_splits/split_name"}}},{"@type":"cr:Field","@id":"yolo/_id","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-yolo"},"extract":{"column":"_id"}}},{"@type":"cr:Field","@id":"yolo/text","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-yolo"},"extract":{"column":"text"}}},{"@type":"cr:Field","@id":"yolo/metadata","subField":[{"@type":"cr:Field","@id":"yolo/metadata/end_byte","dataType":"cr:Int64","source":{"fileSet":{"@id":"parquet-files-for-config-yolo"},"extract":{"column":"metadata"},"transform":{"jsonPath":"end_byte"}}},{"@type":"cr:Field","@id":"yolo/metadata/start_byte","dataType":"cr:Int64","source":{"fileSet":{"@id":"parquet-files-for-config-yolo"},"extract":{"column":"metadata"},"transform":{"jsonPath":"start_byte"}}},{"@type":"cr:Field","@id":"yolo/metadata/url","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-yolo"},"extract":{"column":"metadata"},"transform":{"jsonPath":"url"}}}]}]}],"conformsTo":"http://mlcommons.org/croissant/1.1","name":"corpus-oct-2024","description":"\n\t\n\t\t\n\t\tDataset Card for FreshStack (Corpus)\n\t\n\nHomepage | \nRepository |\nPaper\nFreshStack is a holistic framework to construct challenging IR/RAG evaluation datasets that focuses on search across niche and recent topics.\nThis dataset (October 2024) contains the query, nuggets, answers and nugget-level relevance judgments of 5 niche topics focused on software engineering and machine learning. \nThe queries and answers (accepted) are taken from Stack Overflow, GPT-4o generates the nuggets and… See the full description on the dataset page: https://huggingface.co/datasets/freshstack/corpus-oct-2024.","alternateName":["freshstack/corpus-oct-2024"],"creator":{"@type":"Organization","name":"FreshStack","url":"https://huggingface.co/freshstack"},"keywords":["question-answering","cc-by-sa-4.0","100K - 1M","parquet","Text","Datasets","pandas","Croissant","Polars","arxiv:2504.13128","🇺🇸 Region: US"],"license":"https://choosealicense.com/licenses/cc-by-sa-4.0/","url":"https://huggingface.co/datasets/freshstack/corpus-oct-2024"}