{"@context":{"@language":"en","@vocab":"https://schema.org/","arrayShape":"cr:arrayShape","citeAs":"cr:citeAs","column":"cr:column","conformsTo":"dct:conformsTo","cr":"http://mlcommons.org/croissant/","data":{"@id":"cr:data","@type":"@json"},"dataBiases":"cr:dataBiases","dataCollection":"cr:dataCollection","dataType":{"@id":"cr:dataType","@type":"@vocab"},"dct":"http://purl.org/dc/terms/","extract":"cr:extract","field":"cr:field","fileProperty":"cr:fileProperty","fileObject":"cr:fileObject","fileSet":"cr:fileSet","format":"cr:format","includes":"cr:includes","isArray":"cr:isArray","isLiveDataset":"cr:isLiveDataset","jsonPath":"cr:jsonPath","key":"cr:key","md5":"cr:md5","parentField":"cr:parentField","path":"cr:path","personalSensitiveInformation":"cr:personalSensitiveInformation","recordSet":"cr:recordSet","references":"cr:references","regex":"cr:regex","repeated":"cr:repeated","replace":"cr:replace","sc":"https://schema.org/","separator":"cr:separator","source":"cr:source","subField":"cr:subField","transform":"cr:transform"},"@type":"sc:Dataset","distribution":[{"@type":"cr:FileObject","@id":"repo","name":"repo","description":"The Hugging Face git repository.","contentUrl":"https://huggingface.co/datasets/tokyotech-llm/swallow-code/tree/refs%2Fconvert%2Fparquet","encodingFormat":"git+https","sha256":"https://github.com/mlcommons/croissant/issues/80"},{"@type":"cr:FileSet","@id":"parquet-files-for-config-exp10-direct-sgcr","containedIn":{"@id":"repo"},"encodingFormat":"application/x-parquet","includes":"exp10-direct-sgcr/*/*.parquet"},{"@type":"cr:FileSet","@id":"parquet-files-for-config-exp11-scor","containedIn":{"@id":"repo"},"encodingFormat":"application/x-parquet","includes":"exp11-scor/*/*.parquet"},{"@type":"cr:FileSet","@id":"parquet-files-for-config-exp2-syntax-error","containedIn":{"@id":"repo"},"encodingFormat":"application/x-parquet","includes":"exp2-syntax-error/*/*.parquet"},{"@type":"cr:FileSet","@id":"parquet-files-for-config-exp3-linter","containedIn":{"@id":"repo"},"encodingFormat":"application/x-parquet","includes":"exp3-linter/*/*.parquet"},{"@type":"cr:FileSet","@id":"parquet-files-for-config-exp4-comment-lang","containedIn":{"@id":"repo"},"encodingFormat":"application/x-parquet","includes":"exp4-comment-lang/*/*.parquet"},{"@type":"cr:FileSet","@id":"parquet-files-for-config-exp5-sgcr","containedIn":{"@id":"repo"},"encodingFormat":"application/x-parquet","includes":"exp5-sgcr/*/*.parquet"},{"@type":"cr:FileSet","@id":"parquet-files-for-config-exp6-llm-scoring","containedIn":{"@id":"repo"},"encodingFormat":"application/x-parquet","includes":"exp6-llm-scoring/*/*.parquet"},{"@type":"cr:FileSet","@id":"parquet-files-for-config-exp7","containedIn":{"@id":"repo"},"encodingFormat":"application/x-parquet","includes":"exp7/*/*.parquet"},{"@type":"cr:FileSet","@id":"parquet-files-for-config-swallow-code","containedIn":{"@id":"repo"},"encodingFormat":"application/x-parquet","includes":"swallow-code/*/*.parquet"}],"recordSet":[{"@type":"cr:RecordSet","dataType":"cr:Split","key":{"@id":"exp10-direct-sgcr_splits/split_name"},"@id":"exp10-direct-sgcr_splits","name":"exp10-direct-sgcr_splits","description":"Splits for the exp10-direct-sgcr config.","field":[{"@type":"cr:Field","@id":"exp10-direct-sgcr_splits/split_name","dataType":"sc:Text"}],"data":[{"exp10-direct-sgcr_splits/split_name":"train"}]},{"@type":"cr:RecordSet","@id":"exp10-direct-sgcr","description":"tokyotech-llm/swallow-code - 'exp10-direct-sgcr' subset (first 5GB)","field":[{"@type":"cr:Field","@id":"exp10-direct-sgcr/split","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp10-direct-sgcr"},"extract":{"fileProperty":"fullpath"},"transform":{"regex":"exp10\\-direct\\-sgcr/(?:partial-)?(train)/.+parquet$"}},"references":{"field":{"@id":"exp10-direct-sgcr_splits/split_name"}}},{"@type":"cr:Field","@id":"exp10-direct-sgcr/text","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp10-direct-sgcr"},"extract":{"column":"text"}}}]},{"@type":"cr:RecordSet","dataType":"cr:Split","key":{"@id":"exp11-scor_splits/split_name"},"@id":"exp11-scor_splits","name":"exp11-scor_splits","description":"Splits for the exp11-scor config.","field":[{"@type":"cr:Field","@id":"exp11-scor_splits/split_name","dataType":"sc:Text"}],"data":[{"exp11-scor_splits/split_name":"train"}]},{"@type":"cr:RecordSet","@id":"exp11-scor","description":"tokyotech-llm/swallow-code - 'exp11-scor' subset (first 5GB)","field":[{"@type":"cr:Field","@id":"exp11-scor/split","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp11-scor"},"extract":{"fileProperty":"fullpath"},"transform":{"regex":"exp11\\-scor/(?:partial-)?(train)/.+parquet$"}},"references":{"field":{"@id":"exp11-scor_splits/split_name"}}},{"@type":"cr:Field","@id":"exp11-scor/text","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp11-scor"},"extract":{"column":"text"}}}]},{"@type":"cr:RecordSet","dataType":"cr:Split","key":{"@id":"exp2-syntax-error_splits/split_name"},"@id":"exp2-syntax-error_splits","name":"exp2-syntax-error_splits","description":"Splits for the exp2-syntax-error config.","field":[{"@type":"cr:Field","@id":"exp2-syntax-error_splits/split_name","dataType":"sc:Text"}],"data":[{"exp2-syntax-error_splits/split_name":"train"}]},{"@type":"cr:RecordSet","@id":"exp2-syntax-error","description":"tokyotech-llm/swallow-code - 'exp2-syntax-error' subset (first 5GB)\n\nAdditional information:\n- 1 skipped column: analysis_results","field":[{"@type":"cr:Field","@id":"exp2-syntax-error/split","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp2-syntax-error"},"extract":{"fileProperty":"fullpath"},"transform":{"regex":"exp2\\-syntax\\-error/(?:partial-)?(train)/.+parquet$"}},"references":{"field":{"@id":"exp2-syntax-error_splits/split_name"}}},{"@type":"cr:Field","@id":"exp2-syntax-error/blob_id","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp2-syntax-error"},"extract":{"column":"blob_id"}}},{"@type":"cr:Field","@id":"exp2-syntax-error/path","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp2-syntax-error"},"extract":{"column":"path"}}},{"@type":"cr:Field","@id":"exp2-syntax-error/content_id","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp2-syntax-error"},"extract":{"column":"content_id"}}},{"@type":"cr:Field","@id":"exp2-syntax-error/language","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp2-syntax-error"},"extract":{"column":"language"}}},{"@type":"cr:Field","@id":"exp2-syntax-error/length_bytes","dataType":"cr:Int64","source":{"fileSet":{"@id":"parquet-files-for-config-exp2-syntax-error"},"extract":{"column":"length_bytes"}}},{"@type":"cr:Field","@id":"exp2-syntax-error/detected_licenses","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp2-syntax-error"},"extract":{"column":"detected_licenses"}},"isArray":true,"arrayShape":"-1"},{"@type":"cr:Field","@id":"exp2-syntax-error/license_type","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp2-syntax-error"},"extract":{"column":"license_type"}}},{"@type":"cr:Field","@id":"exp2-syntax-error/src_encoding","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp2-syntax-error"},"extract":{"column":"src_encoding"}}},{"@type":"cr:Field","@id":"exp2-syntax-error/is_vendor","dataType":"sc:Boolean","source":{"fileSet":{"@id":"parquet-files-for-config-exp2-syntax-error"},"extract":{"column":"is_vendor"}}},{"@type":"cr:Field","@id":"exp2-syntax-error/is_generated","dataType":"sc:Boolean","source":{"fileSet":{"@id":"parquet-files-for-config-exp2-syntax-error"},"extract":{"column":"is_generated"}}},{"@type":"cr:Field","@id":"exp2-syntax-error/alphanum_fraction","dataType":"cr:Float64","source":{"fileSet":{"@id":"parquet-files-for-config-exp2-syntax-error"},"extract":{"column":"alphanum_fraction"}}},{"@type":"cr:Field","@id":"exp2-syntax-error/alpha_fraction","dataType":"cr:Float64","source":{"fileSet":{"@id":"parquet-files-for-config-exp2-syntax-error"},"extract":{"column":"alpha_fraction"}}},{"@type":"cr:Field","@id":"exp2-syntax-error/num_lines","dataType":"cr:Int64","source":{"fileSet":{"@id":"parquet-files-for-config-exp2-syntax-error"},"extract":{"column":"num_lines"}}},{"@type":"cr:Field","@id":"exp2-syntax-error/avg_line_length","dataType":"cr:Float64","source":{"fileSet":{"@id":"parquet-files-for-config-exp2-syntax-error"},"extract":{"column":"avg_line_length"}}},{"@type":"cr:Field","@id":"exp2-syntax-error/max_line_length","dataType":"cr:Int64","source":{"fileSet":{"@id":"parquet-files-for-config-exp2-syntax-error"},"extract":{"column":"max_line_length"}}},{"@type":"cr:Field","@id":"exp2-syntax-error/text","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp2-syntax-error"},"extract":{"column":"text"}}},{"@type":"cr:Field","@id":"exp2-syntax-error/has_issues","dataType":"sc:Boolean","source":{"fileSet":{"@id":"parquet-files-for-config-exp2-syntax-error"},"extract":{"column":"has_issues"}}},{"@type":"cr:Field","@id":"exp2-syntax-error/language_type_issue","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp2-syntax-error"},"extract":{"column":"language_type_issue"}},"isArray":true,"arrayShape":"-1"},{"@type":"cr:Field","@id":"exp2-syntax-error/language_type","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp2-syntax-error"},"extract":{"column":"language_type"}}},{"@type":"cr:Field","@id":"exp2-syntax-error/pylint_score","dataType":"cr:Float64","source":{"fileSet":{"@id":"parquet-files-for-config-exp2-syntax-error"},"extract":{"column":"pylint_score"}}},{"@type":"cr:Field","@id":"exp2-syntax-error/pylint_output","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp2-syntax-error"},"extract":{"column":"pylint_output"}}}]},{"@type":"cr:RecordSet","dataType":"cr:Split","key":{"@id":"exp3-linter_splits/split_name"},"@id":"exp3-linter_splits","name":"exp3-linter_splits","description":"Splits for the exp3-linter config.","field":[{"@type":"cr:Field","@id":"exp3-linter_splits/split_name","dataType":"sc:Text"}],"data":[{"exp3-linter_splits/split_name":"train"}]},{"@type":"cr:RecordSet","@id":"exp3-linter","description":"tokyotech-llm/swallow-code - 'exp3-linter' subset (first 5GB)\n\nAdditional information:\n- 1 skipped column: analysis_results","field":[{"@type":"cr:Field","@id":"exp3-linter/split","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp3-linter"},"extract":{"fileProperty":"fullpath"},"transform":{"regex":"exp3\\-linter/(?:partial-)?(train)/.+parquet$"}},"references":{"field":{"@id":"exp3-linter_splits/split_name"}}},{"@type":"cr:Field","@id":"exp3-linter/blob_id","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp3-linter"},"extract":{"column":"blob_id"}}},{"@type":"cr:Field","@id":"exp3-linter/path","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp3-linter"},"extract":{"column":"path"}}},{"@type":"cr:Field","@id":"exp3-linter/content_id","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp3-linter"},"extract":{"column":"content_id"}}},{"@type":"cr:Field","@id":"exp3-linter/language","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp3-linter"},"extract":{"column":"language"}}},{"@type":"cr:Field","@id":"exp3-linter/length_bytes","dataType":"cr:Int64","source":{"fileSet":{"@id":"parquet-files-for-config-exp3-linter"},"extract":{"column":"length_bytes"}}},{"@type":"cr:Field","@id":"exp3-linter/detected_licenses","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp3-linter"},"extract":{"column":"detected_licenses"}},"isArray":true,"arrayShape":"-1"},{"@type":"cr:Field","@id":"exp3-linter/license_type","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp3-linter"},"extract":{"column":"license_type"}}},{"@type":"cr:Field","@id":"exp3-linter/src_encoding","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp3-linter"},"extract":{"column":"src_encoding"}}},{"@type":"cr:Field","@id":"exp3-linter/is_vendor","dataType":"sc:Boolean","source":{"fileSet":{"@id":"parquet-files-for-config-exp3-linter"},"extract":{"column":"is_vendor"}}},{"@type":"cr:Field","@id":"exp3-linter/is_generated","dataType":"sc:Boolean","source":{"fileSet":{"@id":"parquet-files-for-config-exp3-linter"},"extract":{"column":"is_generated"}}},{"@type":"cr:Field","@id":"exp3-linter/alphanum_fraction","dataType":"cr:Float64","source":{"fileSet":{"@id":"parquet-files-for-config-exp3-linter"},"extract":{"column":"alphanum_fraction"}}},{"@type":"cr:Field","@id":"exp3-linter/alpha_fraction","dataType":"cr:Float64","source":{"fileSet":{"@id":"parquet-files-for-config-exp3-linter"},"extract":{"column":"alpha_fraction"}}},{"@type":"cr:Field","@id":"exp3-linter/num_lines","dataType":"cr:Int64","source":{"fileSet":{"@id":"parquet-files-for-config-exp3-linter"},"extract":{"column":"num_lines"}}},{"@type":"cr:Field","@id":"exp3-linter/avg_line_length","dataType":"cr:Float64","source":{"fileSet":{"@id":"parquet-files-for-config-exp3-linter"},"extract":{"column":"avg_line_length"}}},{"@type":"cr:Field","@id":"exp3-linter/max_line_length","dataType":"cr:Int64","source":{"fileSet":{"@id":"parquet-files-for-config-exp3-linter"},"extract":{"column":"max_line_length"}}},{"@type":"cr:Field","@id":"exp3-linter/text","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp3-linter"},"extract":{"column":"text"}}},{"@type":"cr:Field","@id":"exp3-linter/has_issues","dataType":"sc:Boolean","source":{"fileSet":{"@id":"parquet-files-for-config-exp3-linter"},"extract":{"column":"has_issues"}}},{"@type":"cr:Field","@id":"exp3-linter/language_type_issue","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp3-linter"},"extract":{"column":"language_type_issue"}},"isArray":true,"arrayShape":"-1"},{"@type":"cr:Field","@id":"exp3-linter/language_type","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp3-linter"},"extract":{"column":"language_type"}}},{"@type":"cr:Field","@id":"exp3-linter/pylint_score","dataType":"cr:Float64","source":{"fileSet":{"@id":"parquet-files-for-config-exp3-linter"},"extract":{"column":"pylint_score"}}},{"@type":"cr:Field","@id":"exp3-linter/pylint_output","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp3-linter"},"extract":{"column":"pylint_output"}}}]},{"@type":"cr:RecordSet","dataType":"cr:Split","key":{"@id":"exp4-comment-lang_splits/split_name"},"@id":"exp4-comment-lang_splits","name":"exp4-comment-lang_splits","description":"Splits for the exp4-comment-lang config.","field":[{"@type":"cr:Field","@id":"exp4-comment-lang_splits/split_name","dataType":"sc:Text"}],"data":[{"exp4-comment-lang_splits/split_name":"train"}]},{"@type":"cr:RecordSet","@id":"exp4-comment-lang","description":"tokyotech-llm/swallow-code - 'exp4-comment-lang' subset (first 5GB)\n\nAdditional information:\n- 2 skipped columns: analysis_results, language_type_issue","field":[{"@type":"cr:Field","@id":"exp4-comment-lang/split","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp4-comment-lang"},"extract":{"fileProperty":"fullpath"},"transform":{"regex":"exp4\\-comment\\-lang/(?:partial-)?(train)/.+parquet$"}},"references":{"field":{"@id":"exp4-comment-lang_splits/split_name"}}},{"@type":"cr:Field","@id":"exp4-comment-lang/blob_id","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp4-comment-lang"},"extract":{"column":"blob_id"}}},{"@type":"cr:Field","@id":"exp4-comment-lang/path","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp4-comment-lang"},"extract":{"column":"path"}}},{"@type":"cr:Field","@id":"exp4-comment-lang/content_id","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp4-comment-lang"},"extract":{"column":"content_id"}}},{"@type":"cr:Field","@id":"exp4-comment-lang/language","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp4-comment-lang"},"extract":{"column":"language"}}},{"@type":"cr:Field","@id":"exp4-comment-lang/length_bytes","dataType":"cr:Int64","source":{"fileSet":{"@id":"parquet-files-for-config-exp4-comment-lang"},"extract":{"column":"length_bytes"}}},{"@type":"cr:Field","@id":"exp4-comment-lang/detected_licenses","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp4-comment-lang"},"extract":{"column":"detected_licenses"}},"isArray":true,"arrayShape":"-1"},{"@type":"cr:Field","@id":"exp4-comment-lang/license_type","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp4-comment-lang"},"extract":{"column":"license_type"}}},{"@type":"cr:Field","@id":"exp4-comment-lang/src_encoding","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp4-comment-lang"},"extract":{"column":"src_encoding"}}},{"@type":"cr:Field","@id":"exp4-comment-lang/is_vendor","dataType":"sc:Boolean","source":{"fileSet":{"@id":"parquet-files-for-config-exp4-comment-lang"},"extract":{"column":"is_vendor"}}},{"@type":"cr:Field","@id":"exp4-comment-lang/is_generated","dataType":"sc:Boolean","source":{"fileSet":{"@id":"parquet-files-for-config-exp4-comment-lang"},"extract":{"column":"is_generated"}}},{"@type":"cr:Field","@id":"exp4-comment-lang/alphanum_fraction","dataType":"cr:Float64","source":{"fileSet":{"@id":"parquet-files-for-config-exp4-comment-lang"},"extract":{"column":"alphanum_fraction"}}},{"@type":"cr:Field","@id":"exp4-comment-lang/alpha_fraction","dataType":"cr:Float64","source":{"fileSet":{"@id":"parquet-files-for-config-exp4-comment-lang"},"extract":{"column":"alpha_fraction"}}},{"@type":"cr:Field","@id":"exp4-comment-lang/num_lines","dataType":"cr:Int64","source":{"fileSet":{"@id":"parquet-files-for-config-exp4-comment-lang"},"extract":{"column":"num_lines"}}},{"@type":"cr:Field","@id":"exp4-comment-lang/avg_line_length","dataType":"cr:Float64","source":{"fileSet":{"@id":"parquet-files-for-config-exp4-comment-lang"},"extract":{"column":"avg_line_length"}}},{"@type":"cr:Field","@id":"exp4-comment-lang/max_line_length","dataType":"cr:Int64","source":{"fileSet":{"@id":"parquet-files-for-config-exp4-comment-lang"},"extract":{"column":"max_line_length"}}},{"@type":"cr:Field","@id":"exp4-comment-lang/text","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp4-comment-lang"},"extract":{"column":"text"}}},{"@type":"cr:Field","@id":"exp4-comment-lang/has_issues","dataType":"sc:Boolean","source":{"fileSet":{"@id":"parquet-files-for-config-exp4-comment-lang"},"extract":{"column":"has_issues"}}},{"@type":"cr:Field","@id":"exp4-comment-lang/language_type","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp4-comment-lang"},"extract":{"column":"language_type"}}},{"@type":"cr:Field","@id":"exp4-comment-lang/pylint_score","dataType":"cr:Float64","source":{"fileSet":{"@id":"parquet-files-for-config-exp4-comment-lang"},"extract":{"column":"pylint_score"}}},{"@type":"cr:Field","@id":"exp4-comment-lang/pylint_output","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp4-comment-lang"},"extract":{"column":"pylint_output"}}}]},{"@type":"cr:RecordSet","dataType":"cr:Split","key":{"@id":"exp5-sgcr_splits/split_name"},"@id":"exp5-sgcr_splits","name":"exp5-sgcr_splits","description":"Splits for the exp5-sgcr config.","field":[{"@type":"cr:Field","@id":"exp5-sgcr_splits/split_name","dataType":"sc:Text"}],"data":[{"exp5-sgcr_splits/split_name":"train"}]},{"@type":"cr:RecordSet","@id":"exp5-sgcr","description":"tokyotech-llm/swallow-code - 'exp5-sgcr' subset (first 5GB)","field":[{"@type":"cr:Field","@id":"exp5-sgcr/split","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp5-sgcr"},"extract":{"fileProperty":"fullpath"},"transform":{"regex":"exp5\\-sgcr/(?:partial-)?(train)/.+parquet$"}},"references":{"field":{"@id":"exp5-sgcr_splits/split_name"}}},{"@type":"cr:Field","@id":"exp5-sgcr/text","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp5-sgcr"},"extract":{"column":"text"}}}]},{"@type":"cr:RecordSet","dataType":"cr:Split","key":{"@id":"exp6-llm-scoring_splits/split_name"},"@id":"exp6-llm-scoring_splits","name":"exp6-llm-scoring_splits","description":"Splits for the exp6-llm-scoring config.","field":[{"@type":"cr:Field","@id":"exp6-llm-scoring_splits/split_name","dataType":"sc:Text"}],"data":[{"exp6-llm-scoring_splits/split_name":"train"}]},{"@type":"cr:RecordSet","@id":"exp6-llm-scoring","description":"tokyotech-llm/swallow-code - 'exp6-llm-scoring' subset (first 5GB)","field":[{"@type":"cr:Field","@id":"exp6-llm-scoring/split","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp6-llm-scoring"},"extract":{"fileProperty":"fullpath"},"transform":{"regex":"exp6\\-llm\\-scoring/(?:partial-)?(train)/.+parquet$"}},"references":{"field":{"@id":"exp6-llm-scoring_splits/split_name"}}},{"@type":"cr:Field","@id":"exp6-llm-scoring/text","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp6-llm-scoring"},"extract":{"column":"text"}}}]},{"@type":"cr:RecordSet","dataType":"cr:Split","key":{"@id":"exp7_splits/split_name"},"@id":"exp7_splits","name":"exp7_splits","description":"Splits for the exp7 config.","field":[{"@type":"cr:Field","@id":"exp7_splits/split_name","dataType":"sc:Text"}],"data":[{"exp7_splits/split_name":"train"}]},{"@type":"cr:RecordSet","@id":"exp7","description":"tokyotech-llm/swallow-code - 'exp7' subset (first 5GB)","field":[{"@type":"cr:Field","@id":"exp7/split","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp7"},"extract":{"fileProperty":"fullpath"},"transform":{"regex":"exp7/(?:partial-)?(train)/.+parquet$"}},"references":{"field":{"@id":"exp7_splits/split_name"}}},{"@type":"cr:Field","@id":"exp7/text","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-exp7"},"extract":{"column":"text"}}}]},{"@type":"cr:RecordSet","dataType":"cr:Split","key":{"@id":"swallow-code_splits/split_name"},"@id":"swallow-code_splits","name":"swallow-code_splits","description":"Splits for the swallow-code config.","field":[{"@type":"cr:Field","@id":"swallow-code_splits/split_name","dataType":"sc:Text"}],"data":[{"swallow-code_splits/split_name":"train"}]},{"@type":"cr:RecordSet","@id":"swallow-code","description":"tokyotech-llm/swallow-code - 'swallow-code' subset (first 5GB)","field":[{"@type":"cr:Field","@id":"swallow-code/split","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-swallow-code"},"extract":{"fileProperty":"fullpath"},"transform":{"regex":"swallow\\-code/(?:partial-)?(train)/.+parquet$"}},"references":{"field":{"@id":"swallow-code_splits/split_name"}}},{"@type":"cr:Field","@id":"swallow-code/text","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-swallow-code"},"extract":{"column":"text"}}}]}],"conformsTo":"http://mlcommons.org/croissant/1.1","name":"swallow-code","description":"\n\t\n\t\t\n\t\tSwallowCode\n\t\n\n\n\n\n\t\n\t\t\n\t\tNotice\n\t\n\n\nMay 21, 2025: We have deleted ablation/exp1-the-stack-v2-train-smol-ids-python because it was flagged as potentially containing unsafe data collected from the Python subset of https://huggingface.co/datasets/bigcode/the-stack-v2-train-smol-ids. However, since this dataset can be reconstructed from the-stack-v2-train-smol-ids, there is no issue in terms of reproducibility.\nMay 21, 2025: ClamAV has flagged “Win.Trojan.MSShellcode-88” in… See the full description on the dataset page: https://huggingface.co/datasets/tokyotech-llm/swallow-code.","alternateName":["tokyotech-llm/swallow-code","swallowcode"],"creator":{"@type":"Organization","name":"tokyotech-llm","url":"https://huggingface.co/tokyotech-llm"},"keywords":["text-generation","English","Japanese","llama3.3","100M - 1B","json","Tabular","Text","Datasets","Dask","Croissant","arxiv:2505.02881","🇺🇸 Region: US","code"],"license":"https://choosealicense.com/licenses/llama3.3/","url":"https://huggingface.co/datasets/tokyotech-llm/swallow-code"}
